rav1e/
deblock.rs

1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::api::FrameType;
11use crate::color::ChromaSampling::Cs400;
12use crate::context::*;
13use crate::encoder::FrameInvariants;
14use crate::partition::RefType::*;
15use crate::predict::PredictionMode::*;
16use crate::quantize::*;
17use crate::tiling::*;
18use crate::util::{clamp, ILog, Pixel};
19use crate::DeblockState;
20use rayon::iter::*;
21use std::cmp;
22
23fn deblock_adjusted_level(
24  deblock: &DeblockState, block: &Block, pli: usize, vertical: bool,
25) -> usize {
26  let idx = if pli == 0 { usize::from(!vertical) } else { pli + 1 };
27
28  let level = if deblock.block_deltas_enabled {
29    // By-block filter strength delta, if the feature is active.
30    let block_delta = if deblock.block_delta_multi {
31      block.deblock_deltas[idx] << deblock.block_delta_shift
32    } else {
33      block.deblock_deltas[0] << deblock.block_delta_shift
34    };
35
36    // Add to frame-specified filter strength (Y-vertical, Y-horizontal, U, V)
37    clamp(block_delta + deblock.levels[idx] as i8, 0, MAX_LOOP_FILTER as i8)
38      as u8
39  } else {
40    deblock.levels[idx]
41  };
42
43  // if fi.seg_feaure_active {
44  // rav1e does not yet support segments or segment features
45  // }
46
47  // Are delta modifiers for specific references and modes active?  If so, add them too.
48  if deblock.deltas_enabled {
49    let mode = block.mode;
50    let reference = block.ref_frames[0];
51    let mode_type = usize::from(
52      mode >= NEARESTMV && mode != GLOBALMV && mode != GLOBAL_GLOBALMV,
53    );
54    let l5 = level >> 5;
55    clamp(
56      level as i32
57        + ((deblock.ref_deltas[reference.to_index()] as i32) << l5)
58        + if reference == INTRA_FRAME {
59          0
60        } else {
61          (deblock.mode_deltas[mode_type] as i32) << l5
62        },
63      0,
64      MAX_LOOP_FILTER as i32,
65    ) as usize
66  } else {
67    level as usize
68  }
69}
70
71#[inline]
72fn deblock_left<'a, T: Pixel>(
73  blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>,
74) -> &'a Block {
75  let xdec = p.plane_cfg.xdec;
76  let ydec = p.plane_cfg.ydec;
77
78  // subsampled chroma uses odd mi row/col
79  // We already know we're not at the upper/left corner, so prev_block is in frame
80  &blocks[in_bo.0.y | ydec][(in_bo.0.x | xdec) - (1 << xdec)]
81}
82
83#[inline]
84fn deblock_up<'a, T: Pixel>(
85  blocks: &'a TileBlocks, in_bo: TileBlockOffset, p: &PlaneRegion<T>,
86) -> &'a Block {
87  let xdec = p.plane_cfg.xdec;
88  let ydec = p.plane_cfg.ydec;
89
90  // subsampled chroma uses odd mi row/col
91  &blocks[(in_bo.0.y | ydec) - (1 << ydec)][in_bo.0.x | xdec]
92}
93
94// Must be called on a tx edge, and not on a frame edge.  This is enforced above the call.
95fn deblock_size<T: Pixel>(
96  block: &Block, prev_block: &Block, p: &PlaneRegion<T>, pli: usize,
97  vertical: bool, block_edge: bool,
98) -> usize {
99  let xdec = p.plane_cfg.xdec;
100  let ydec = p.plane_cfg.ydec;
101
102  // filter application is conditional on skip and block edge
103  if !(block_edge
104    || !block.skip
105    || !prev_block.skip
106    || block.ref_frames[0] == INTRA_FRAME
107    || prev_block.ref_frames[0] == INTRA_FRAME)
108  {
109    0
110  } else {
111    let (txsize, prev_txsize) = if pli == 0 {
112      (block.txsize, prev_block.txsize)
113    } else {
114      (
115        block.bsize.largest_chroma_tx_size(xdec, ydec),
116        prev_block.bsize.largest_chroma_tx_size(xdec, ydec),
117      )
118    };
119    let (tx_n, prev_tx_n) = if vertical {
120      (cmp::max(txsize.width_mi(), 1), cmp::max(prev_txsize.width_mi(), 1))
121    } else {
122      (cmp::max(txsize.height_mi(), 1), cmp::max(prev_txsize.height_mi(), 1))
123    };
124    cmp::min(
125      if pli == 0 { 14 } else { 6 },
126      cmp::min(tx_n, prev_tx_n) << MI_SIZE_LOG2,
127    )
128  }
129}
130
131// Must be called on a tx edge
132#[inline]
133fn deblock_level(
134  deblock: &DeblockState, block: &Block, prev_block: &Block, pli: usize,
135  vertical: bool,
136) -> usize {
137  let level = deblock_adjusted_level(deblock, block, pli, vertical);
138  if level == 0 {
139    deblock_adjusted_level(deblock, prev_block, pli, vertical)
140  } else {
141    level
142  }
143}
144
145// four taps, 4 outputs (two are trivial)
146#[inline]
147fn filter_narrow2_4(
148  p1: i32, p0: i32, q0: i32, q1: i32, shift: usize,
149) -> [i32; 4] {
150  let filter0 = clamp(p1 - q1, -128 << shift, (128 << shift) - 1);
151  let filter1 =
152    clamp(filter0 + 3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3;
153  // be certain our optimization removing a clamp is sound
154  debug_assert!({
155    let base =
156      clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
157    let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3;
158    filter1 == test
159  });
160  let filter2 =
161    clamp(filter0 + 3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3;
162  // be certain our optimization removing a clamp is sound
163  debug_assert!({
164    let base =
165      clamp(filter0 + 3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
166    let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3;
167    filter2 == test
168  });
169  [
170    p1,
171    clamp(p0 + filter2, 0, (256 << shift) - 1),
172    clamp(q0 - filter1, 0, (256 << shift) - 1),
173    q1,
174  ]
175}
176
177// six taps, 6 outputs (four are trivial)
178#[inline]
179fn filter_narrow2_6(
180  p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
181) -> [i32; 6] {
182  let x = filter_narrow2_4(p1, p0, q0, q1, shift);
183  [p2, x[0], x[1], x[2], x[3], q2]
184}
185
186// 12 taps, 12 outputs (ten are trivial)
187#[inline]
188fn filter_narrow2_12(
189  p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
190  q2: i32, q3: i32, q4: i32, q5: i32, shift: usize,
191) -> [i32; 12] {
192  let x = filter_narrow2_4(p1, p0, q0, q1, shift);
193  [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5]
194}
195
196// four taps, 4 outputs
197#[inline]
198fn filter_narrow4_4(
199  p1: i32, p0: i32, q0: i32, q1: i32, shift: usize,
200) -> [i32; 4] {
201  let filter1 =
202    clamp(3 * (q0 - p0) + 4, -128 << shift, (128 << shift) - 1) >> 3;
203  // be certain our optimization removing a clamp is sound
204  debug_assert!({
205    let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
206    let test = clamp(base + 4, -128 << shift, (128 << shift) - 1) >> 3;
207    filter1 == test
208  });
209  let filter2 =
210    clamp(3 * (q0 - p0) + 3, -128 << shift, (128 << shift) - 1) >> 3;
211  // be certain our optimization removing a clamp is sound
212  debug_assert!({
213    let base = clamp(3 * (q0 - p0), -128 << shift, (128 << shift) - 1);
214    let test = clamp(base + 3, -128 << shift, (128 << shift) - 1) >> 3;
215    filter2 == test
216  });
217  let filter3 = (filter1 + 1) >> 1;
218  [
219    clamp(p1 + filter3, 0, (256 << shift) - 1),
220    clamp(p0 + filter2, 0, (256 << shift) - 1),
221    clamp(q0 - filter1, 0, (256 << shift) - 1),
222    clamp(q1 - filter3, 0, (256 << shift) - 1),
223  ]
224}
225
226// six taps, 6 outputs (two are trivial)
227#[inline]
228fn filter_narrow4_6(
229  p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
230) -> [i32; 6] {
231  let x = filter_narrow4_4(p1, p0, q0, q1, shift);
232  [p2, x[0], x[1], x[2], x[3], q2]
233}
234
235// 12 taps, 12 outputs (eight are trivial)
236#[inline]
237fn filter_narrow4_12(
238  p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
239  q2: i32, q3: i32, q4: i32, q5: i32, shift: usize,
240) -> [i32; 12] {
241  let x = filter_narrow4_4(p1, p0, q0, q1, shift);
242  [p5, p4, p3, p2, x[0], x[1], x[2], x[3], q2, q3, q4, q5]
243}
244
245// six taps, 4 outputs
246#[rustfmt::skip]
247#[inline]
248const fn filter_wide6_4(
249  p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32
250) -> [i32; 4] {
251  [
252    (p2*3 + p1*2 + p0*2 + q0   + (1<<2)) >> 3,
253    (p2   + p1*2 + p0*2 + q0*2 + q1   + (1<<2)) >> 3,
254           (p1   + p0*2 + q0*2 + q1*2 + q2   + (1<<2)) >> 3,
255                  (p0   + q0*2 + q1*2 + q2*3 + (1<<2)) >> 3
256  ]
257}
258
259// eight taps, 6 outputs
260#[rustfmt::skip]
261#[inline]
262const fn filter_wide8_6(
263  p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32
264) -> [i32; 6] {
265  [
266    (p3*3 + p2*2 + p1   + p0   + q0   + (1<<2)) >> 3,
267    (p3*2 + p2   + p1*2 + p0   + q0   + q1   + (1<<2)) >> 3,
268    (p3   + p2   + p1   + p0*2 + q0   + q1   + q2   +(1<<2)) >> 3,
269           (p2   + p1   + p0   + q0*2 + q1   + q2   + q3   + (1<<2)) >> 3,
270                  (p1   + p0   + q0   + q1*2 + q2   + q3*2 + (1<<2)) >> 3,
271                         (p0   + q0   + q1   + q2*2 + q3*3 + (1<<2)) >> 3
272  ]
273}
274
275// 12 taps, 12 outputs (six are trivial)
276#[inline]
277const fn filter_wide8_12(
278  p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32,
279  q2: i32, q3: i32, q4: i32, q5: i32,
280) -> [i32; 12] {
281  let x = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3);
282  [p5, p4, p3, x[0], x[1], x[2], x[3], x[4], x[5], q3, q4, q5]
283}
284
285// fourteen taps, 12 outputs
286#[rustfmt::skip]
287#[inline]
288const fn filter_wide14_12(
289  p6: i32, p5: i32, p4: i32, p3: i32, p2: i32, p1: i32, p0: i32, q0: i32,
290  q1: i32, q2: i32, q3: i32, q4: i32, q5: i32, q6: i32
291) -> [i32; 12] {
292  [
293    (p6*7 + p5*2 + p4*2 + p3   + p2   + p1   + p0   + q0   + (1<<3)) >> 4,
294    (p6*5 + p5*2 + p4*2 + p3*2 + p2   + p1   + p0   + q0   + q1   + (1<<3)) >> 4,
295    (p6*4 + p5   + p4*2 + p3*2 + p2*2 + p1   + p0   + q0   + q1   + q2   + (1<<3)) >> 4,
296    (p6*3 + p5   + p4   + p3*2 + p2*2 + p1*2 + p0   + q0   + q1   + q2   + q3   + (1<<3)) >> 4,
297    (p6*2 + p5   + p4   + p3   + p2*2 + p1*2 + p0*2 + q0   + q1   + q2   + q3   + q4   + (1<<3)) >> 4,
298    (p6   + p5   + p4   + p3   + p2   + p1*2 + p0*2 + q0*2 + q1   + q2   + q3   + q4   + q5   + (1<<3)) >> 4,
299           (p5   + p4   + p3   + p2   + p1   + p0*2 + q0*2 + q1*2 + q2   + q3   + q4   + q5   + q6 + (1<<3)) >> 4,
300                  (p4   + p3   + p2   + p1   + p0   + q0*2 + q1*2 + q2*2 + q3   + q4   + q5   + q6*2 + (1<<3)) >> 4,
301                         (p3   + p2   + p1   + p0   + q0   + q1*2 + q2*2 + q3*2 + q4   + q5   + q6*3 + (1<<3)) >> 4,
302                                (p2   + p1   + p0   + q0   + q1   + q2*2 + q3*2 + q4*2 + q5   + q6*4 + (1<<3)) >> 4,
303                                       (p1   + p0   + q0   + q1   + q2   + q3*2 + q4*2 + q5*2 + q6*5 + (1<<3)) >> 4,
304                                              (p0   + q0   + q1   + q2   + q3   + q4*2 + q5*2 + q6*7 + (1<<3)) >> 4
305  ]
306}
307
308#[inline]
309fn copy_horizontal<T: Pixel>(
310  dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32],
311) {
312  let row = &mut dst[y][x..];
313  for (dst, src) in row.iter_mut().take(src.len()).zip(src) {
314    *dst = T::cast_from(*src);
315  }
316}
317
318#[inline]
319fn copy_vertical<T: Pixel>(
320  dst: &mut PlaneRegionMut<'_, T>, x: usize, y: usize, src: &[i32],
321) {
322  for (i, v) in src.iter().enumerate() {
323    let p = &mut dst[y + i][x];
324    *p = T::cast_from(*v);
325  }
326}
327
328#[inline]
329fn stride_sse<const LEN: usize>(a: &[i32; LEN], b: &[i32; LEN]) -> i64 {
330  a.iter().zip(b).map(|(a, b)| (a - b) * (a - b)).sum::<i32>() as i64
331}
332
333#[inline]
334const fn _level_to_limit(level: i32, shift: usize) -> i32 {
335  level << shift
336}
337
338#[inline]
339const fn limit_to_level(limit: i32, shift: usize) -> i32 {
340  (limit + (1 << shift) - 1) >> shift
341}
342
343#[inline]
344const fn _level_to_blimit(level: i32, shift: usize) -> i32 {
345  (3 * level + 4) << shift
346}
347
348#[inline]
349const fn blimit_to_level(blimit: i32, shift: usize) -> i32 {
350  (((blimit + (1 << shift) - 1) >> shift) - 2) / 3
351}
352
353#[inline]
354const fn _level_to_thresh(level: i32, shift: usize) -> i32 {
355  level >> 4 << shift
356}
357
358#[inline]
359const fn thresh_to_level(thresh: i32, shift: usize) -> i32 {
360  (thresh + (1 << shift) - 1) >> shift << 4
361}
362
363#[inline]
364fn nhev4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize {
365  thresh_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift) as usize
366}
367
368#[inline]
369fn mask4(p1: i32, p0: i32, q0: i32, q1: i32, shift: usize) -> usize {
370  cmp::max(
371    limit_to_level(cmp::max((p1 - p0).abs(), (q1 - q0).abs()), shift),
372    blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
373  ) as usize
374}
375
376#[inline]
377fn deblock_size4_inner(
378  [p1, p0, q0, q1]: [i32; 4], level: usize, bd: usize,
379) -> Option<[i32; 4]> {
380  if mask4(p1, p0, q0, q1, bd - 8) <= level {
381    let x = if nhev4(p1, p0, q0, q1, bd - 8) <= level {
382      filter_narrow4_4(p1, p0, q0, q1, bd - 8)
383    } else {
384      filter_narrow2_4(p1, p0, q0, q1, bd - 8)
385    };
386    Some(x)
387  } else {
388    None
389  }
390}
391
392// Assumes rec[0] is set 2 taps back from the edge
393fn deblock_v_size4<T: Pixel>(
394  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
395) {
396  for y in 0..4 {
397    let p = &rec[y];
398    let vals = [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_()];
399    if let Some(data) = deblock_size4_inner(vals, level, bd) {
400      copy_horizontal(rec, 0, y, &data);
401    }
402  }
403}
404
405// Assumes rec[0] is set 2 taps back from the edge
406fn deblock_h_size4<T: Pixel>(
407  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
408) {
409  for x in 0..4 {
410    let vals =
411      [rec[0][x].as_(), rec[1][x].as_(), rec[2][x].as_(), rec[3][x].as_()];
412    if let Some(data) = deblock_size4_inner(vals, level, bd) {
413      copy_vertical(rec, x, 0, &data);
414    }
415  }
416}
417
418// Assumes rec[0] and src[0] are set 2 taps back from the edge.
419// Accesses four taps, accumulates four pixels into the tally
420fn sse_size4<T: Pixel>(
421  rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
422  tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
423) {
424  for i in 0..4 {
425    let (p1, p0, q0, q1, a) = if horizontal_p {
426      (
427        rec[0][i].as_(),
428        rec[1][i].as_(),
429        rec[2][i].as_(),
430        rec[3][i].as_(),
431        [src[0][i].as_(), src[1][i].as_(), src[2][i].as_(), src[3][i].as_()],
432      )
433    } else {
434      (
435        rec[i][0].as_(),
436        rec[i][1].as_(),
437        rec[i][2].as_(),
438        rec[i][3].as_(),
439        [src[i][0].as_(), src[i][1].as_(), src[i][2].as_(), src[i][3].as_()],
440      )
441    };
442
443    // three possibilities: no filter, narrow2 and narrow4
444    // All possibilities produce four outputs
445    let none: [_; 4] = [p1, p0, q0, q1];
446    let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8);
447    let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8);
448
449    // mask4 sets the dividing line for filter vs no filter
450    // nhev4 sets the dividing line between narrow2 and narrow4
451    let mask = clamp(mask4(p1, p0, q0, q1, bd - 8), 1, MAX_LOOP_FILTER + 1);
452    let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
453
454    // sse for each; short-circuit the 'special' no-op cases.
455    let sse_none = stride_sse(&a, &none);
456    let sse_narrow2 =
457      if nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
458    let sse_narrow4 = if nhev <= MAX_LOOP_FILTER {
459      stride_sse(&a, &narrow4)
460    } else {
461      sse_none
462    };
463
464    // accumulate possible filter values into the tally
465    // level 0 is a special case
466    tally[0] += sse_none;
467    tally[mask] -= sse_none;
468    tally[mask] += sse_narrow2;
469    tally[nhev] -= sse_narrow2;
470    tally[nhev] += sse_narrow4;
471  }
472}
473
474#[inline]
475fn mask6(
476  p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, shift: usize,
477) -> usize {
478  cmp::max(
479    limit_to_level(
480      cmp::max(
481        (p2 - p1).abs(),
482        cmp::max((p1 - p0).abs(), cmp::max((q2 - q1).abs(), (q1 - q0).abs())),
483      ),
484      shift,
485    ),
486    blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
487  ) as usize
488}
489
490#[inline]
491fn flat6(p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32) -> usize {
492  cmp::max(
493    (p1 - p0).abs(),
494    cmp::max((q1 - q0).abs(), cmp::max((p2 - p0).abs(), (q2 - q0).abs())),
495  ) as usize
496}
497
498#[inline]
499fn deblock_size6_inner(
500  [p2, p1, p0, q0, q1, q2]: [i32; 6], level: usize, bd: usize,
501) -> Option<[i32; 4]> {
502  if mask6(p2, p1, p0, q0, q1, q2, bd - 8) <= level {
503    let flat = 1 << (bd - 8);
504    let x = if flat6(p2, p1, p0, q0, q1, q2) <= flat {
505      filter_wide6_4(p2, p1, p0, q0, q1, q2)
506    } else if nhev4(p1, p0, q0, q1, bd - 8) <= level {
507      filter_narrow4_4(p1, p0, q0, q1, bd - 8)
508    } else {
509      filter_narrow2_4(p1, p0, q0, q1, bd - 8)
510    };
511    Some(x)
512  } else {
513    None
514  }
515}
516
517// Assumes slice[0] is set 3 taps back from the edge
518fn deblock_v_size6<T: Pixel>(
519  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
520) {
521  for y in 0..4 {
522    let p = &rec[y];
523    let vals =
524      [p[0].as_(), p[1].as_(), p[2].as_(), p[3].as_(), p[4].as_(), p[5].as_()];
525    if let Some(data) = deblock_size6_inner(vals, level, bd) {
526      copy_horizontal(rec, 1, y, &data);
527    }
528  }
529}
530
531// Assumes slice[0] is set 3 taps back from the edge
532fn deblock_h_size6<T: Pixel>(
533  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
534) {
535  for x in 0..4 {
536    let vals = [
537      rec[0][x].as_(),
538      rec[1][x].as_(),
539      rec[2][x].as_(),
540      rec[3][x].as_(),
541      rec[4][x].as_(),
542      rec[5][x].as_(),
543    ];
544    if let Some(data) = deblock_size6_inner(vals, level, bd) {
545      copy_vertical(rec, x, 1, &data);
546    }
547  }
548}
549
550// Assumes rec[0] and src[0] are set 3 taps back from the edge.
551// Accesses six taps, accumulates four pixels into the tally
552fn sse_size6<T: Pixel>(
553  rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
554  tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
555) {
556  let flat = 1 << (bd - 8);
557  for i in 0..4 {
558    let (p2, p1, p0, q0, q1, q2, a) = if horizontal_p {
559      // six taps
560      (
561        rec[0][i].as_(),
562        rec[1][i].as_(),
563        rec[2][i].as_(),
564        rec[3][i].as_(),
565        rec[4][i].as_(),
566        rec[5][i].as_(),
567        // four pixels to compare so offset one forward
568        [src[1][i].as_(), src[2][i].as_(), src[3][i].as_(), src[4][i].as_()],
569      )
570    } else {
571      // six taps
572      (
573        rec[i][0].as_(),
574        rec[i][1].as_(),
575        rec[i][2].as_(),
576        rec[i][3].as_(),
577        rec[i][4].as_(),
578        rec[i][5].as_(),
579        // four pixels to compare so offset one forward
580        [src[i][1].as_(), src[i][2].as_(), src[i][3].as_(), src[i][4].as_()],
581      )
582    };
583
584    // Four possibilities: no filter, wide6, narrow2 and narrow4
585    // All possibilities produce four outputs
586    let none: [_; 4] = [p1, p0, q0, q1];
587    let wide6 = filter_wide6_4(p2, p1, p0, q0, q1, q2);
588    let narrow2 = filter_narrow2_4(p1, p0, q0, q1, bd - 8);
589    let narrow4 = filter_narrow4_4(p1, p0, q0, q1, bd - 8);
590
591    // mask6 sets the dividing line for filter vs no filter
592    // flat6 decides between wide and narrow filters (unrelated to level)
593    // nhev4 sets the dividing line between narrow2 and narrow4
594    let mask =
595      clamp(mask6(p2, p1, p0, q0, q1, q2, bd - 8), 1, MAX_LOOP_FILTER + 1);
596    let flatp = flat6(p2, p1, p0, q0, q1, q2) <= flat;
597    let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
598
599    // sse for each; short-circuit the 'special' no-op cases.
600    let sse_none = stride_sse(&a, &none);
601    let sse_wide6 = if flatp && mask <= MAX_LOOP_FILTER {
602      stride_sse(&a, &wide6)
603    } else {
604      sse_none
605    };
606    let sse_narrow2 =
607      if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
608    let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER {
609      stride_sse(&a, &narrow4)
610    } else {
611      sse_none
612    };
613
614    // accumulate possible filter values into the tally
615    tally[0] += sse_none;
616    tally[mask] -= sse_none;
617    if flatp {
618      tally[mask] += sse_wide6;
619    } else {
620      tally[mask] += sse_narrow2;
621      tally[nhev] -= sse_narrow2;
622      tally[nhev] += sse_narrow4;
623    }
624  }
625}
626
627#[inline]
628fn mask8(
629  p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32,
630  shift: usize,
631) -> usize {
632  cmp::max(
633    limit_to_level(
634      cmp::max(
635        (p3 - p2).abs(),
636        cmp::max(
637          (p2 - p1).abs(),
638          cmp::max(
639            (p1 - p0).abs(),
640            cmp::max(
641              (q3 - q2).abs(),
642              cmp::max((q2 - q1).abs(), (q1 - q0).abs()),
643            ),
644          ),
645        ),
646      ),
647      shift,
648    ),
649    blimit_to_level((p0 - q0).abs() * 2 + (p1 - q1).abs() / 2, shift),
650  ) as usize
651}
652
653#[inline]
654fn flat8(
655  p3: i32, p2: i32, p1: i32, p0: i32, q0: i32, q1: i32, q2: i32, q3: i32,
656) -> usize {
657  cmp::max(
658    (p1 - p0).abs(),
659    cmp::max(
660      (q1 - q0).abs(),
661      cmp::max(
662        (p2 - p0).abs(),
663        cmp::max((q2 - q0).abs(), cmp::max((p3 - p0).abs(), (q3 - q0).abs())),
664      ),
665    ),
666  ) as usize
667}
668
669#[inline]
670fn deblock_size8_inner(
671  [p3, p2, p1, p0, q0, q1, q2, q3]: [i32; 8], level: usize, bd: usize,
672) -> Option<[i32; 6]> {
673  if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level {
674    let flat = 1 << (bd - 8);
675    let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat {
676      filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3)
677    } else if nhev4(p1, p0, q0, q1, bd - 8) <= level {
678      filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8)
679    } else {
680      filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8)
681    };
682    Some(x)
683  } else {
684    None
685  }
686}
687
688// Assumes rec[0] is set 4 taps back from the edge
689fn deblock_v_size8<T: Pixel>(
690  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
691) {
692  for y in 0..4 {
693    let p = &rec[y];
694    let vals = [
695      p[0].as_(),
696      p[1].as_(),
697      p[2].as_(),
698      p[3].as_(),
699      p[4].as_(),
700      p[5].as_(),
701      p[6].as_(),
702      p[7].as_(),
703    ];
704    if let Some(data) = deblock_size8_inner(vals, level, bd) {
705      copy_horizontal(rec, 1, y, &data);
706    }
707  }
708}
709
710// Assumes rec[0] is set 4 taps back from the edge
711fn deblock_h_size8<T: Pixel>(
712  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
713) {
714  for x in 0..4 {
715    let vals = [
716      rec[0][x].as_(),
717      rec[1][x].as_(),
718      rec[2][x].as_(),
719      rec[3][x].as_(),
720      rec[4][x].as_(),
721      rec[5][x].as_(),
722      rec[6][x].as_(),
723      rec[7][x].as_(),
724    ];
725    if let Some(data) = deblock_size8_inner(vals, level, bd) {
726      copy_vertical(rec, x, 1, &data);
727    }
728  }
729}
730
731// Assumes rec[0] and src[0] are set 4 taps back from the edge.
732// Accesses eight taps, accumulates six pixels into the tally
733fn sse_size8<T: Pixel>(
734  rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
735  tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
736) {
737  let flat = 1 << (bd - 8);
738
739  for i in 0..4 {
740    let (p3, p2, p1, p0, q0, q1, q2, q3, a) = if horizontal_p {
741      // eight taps
742      (
743        rec[0][i].as_(),
744        rec[1][i].as_(),
745        rec[2][i].as_(),
746        rec[3][i].as_(),
747        rec[4][i].as_(),
748        rec[5][i].as_(),
749        rec[6][i].as_(),
750        rec[7][i].as_(),
751        // six pixels to compare so offset one forward
752        [
753          src[1][i].as_(),
754          src[2][i].as_(),
755          src[3][i].as_(),
756          src[4][i].as_(),
757          src[5][i].as_(),
758          src[6][i].as_(),
759        ],
760      )
761    } else {
762      // eight taps
763      (
764        rec[i][0].as_(),
765        rec[i][1].as_(),
766        rec[i][2].as_(),
767        rec[i][3].as_(),
768        rec[i][4].as_(),
769        rec[i][5].as_(),
770        rec[i][6].as_(),
771        rec[i][7].as_(),
772        // six pixels to compare so offset one forward
773        [
774          src[i][1].as_(),
775          src[i][2].as_(),
776          src[i][3].as_(),
777          src[i][4].as_(),
778          src[i][5].as_(),
779          src[i][6].as_(),
780        ],
781      )
782    };
783
784    // Four possibilities: no filter, wide8, narrow2 and narrow4
785    let none: [_; 6] = [p2, p1, p0, q0, q1, q2];
786    let wide8: [_; 6] = filter_wide8_6(p3, p2, p1, p0, q0, q1, q2, q3);
787    let narrow2: [_; 6] = filter_narrow2_6(p2, p1, p0, q0, q1, q2, bd - 8);
788    let narrow4: [_; 6] = filter_narrow4_6(p2, p1, p0, q0, q1, q2, bd - 8);
789
790    // mask8 sets the dividing line for filter vs no filter
791    // flat8 decides between wide and narrow filters (unrelated to level)
792    // nhev4 sets the dividing line between narrow2 and narrow4
793    let mask = clamp(
794      mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8),
795      1,
796      MAX_LOOP_FILTER + 1,
797    );
798    let flatp = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat;
799    let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
800
801    // sse for each; short-circuit the 'special' no-op cases.
802    let sse_none = stride_sse(&a, &none);
803    let sse_wide8 = if flatp && mask <= MAX_LOOP_FILTER {
804      stride_sse(&a, &wide8)
805    } else {
806      sse_none
807    };
808    let sse_narrow2 =
809      if !flatp && nhev != mask { stride_sse(&a, &narrow2) } else { sse_none };
810    let sse_narrow4 = if !flatp && nhev <= MAX_LOOP_FILTER {
811      stride_sse(&a, &narrow4)
812    } else {
813      sse_none
814    };
815
816    // accumulate possible filter values into the tally
817    tally[0] += sse_none;
818    tally[mask] -= sse_none;
819    if flatp {
820      tally[mask] += sse_wide8;
821    } else {
822      tally[mask] += sse_narrow2;
823      tally[nhev] -= sse_narrow2;
824      tally[nhev] += sse_narrow4;
825    }
826  }
827}
828
829#[inline]
830fn flat14_outer(
831  p6: i32, p5: i32, p4: i32, p0: i32, q0: i32, q4: i32, q5: i32, q6: i32,
832) -> usize {
833  cmp::max(
834    (p4 - p0).abs(),
835    cmp::max(
836      (q4 - q0).abs(),
837      cmp::max(
838        (p5 - p0).abs(),
839        cmp::max((q5 - q0).abs(), cmp::max((p6 - p0).abs(), (q6 - q0).abs())),
840      ),
841    ),
842  ) as usize
843}
844
845#[inline]
846fn deblock_size14_inner(
847  [p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6]: [i32; 14],
848  level: usize, bd: usize,
849) -> Option<[i32; 12]> {
850  // 'mask' test
851  if mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8) <= level {
852    let flat = 1 << (bd - 8);
853    // inner flatness test
854    let x = if flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat {
855      // outer flatness test
856      if flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat {
857        // sufficient flatness across 14 pixel width; run full-width filter
858        filter_wide14_12(
859          p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6,
860        )
861      } else {
862        // only flat in inner area, run 8-tap
863        filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5)
864      }
865    } else if nhev4(p1, p0, q0, q1, bd - 8) <= level {
866      // not flat, run narrow filter
867      filter_narrow4_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8)
868    } else {
869      filter_narrow2_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, bd - 8)
870    };
871    Some(x)
872  } else {
873    None
874  }
875}
876
877// Assumes rec[0] is set 7 taps back from the edge
878fn deblock_v_size14<T: Pixel>(
879  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
880) {
881  for y in 0..4 {
882    let p = &rec[y];
883    let vals = [
884      p[0].as_(),
885      p[1].as_(),
886      p[2].as_(),
887      p[3].as_(),
888      p[4].as_(),
889      p[5].as_(),
890      p[6].as_(),
891      p[7].as_(),
892      p[8].as_(),
893      p[9].as_(),
894      p[10].as_(),
895      p[11].as_(),
896      p[12].as_(),
897      p[13].as_(),
898    ];
899    if let Some(data) = deblock_size14_inner(vals, level, bd) {
900      copy_horizontal(rec, 1, y, &data);
901    }
902  }
903}
904
905// Assumes rec[0] is set 7 taps back from the edge
906fn deblock_h_size14<T: Pixel>(
907  rec: &mut PlaneRegionMut<'_, T>, level: usize, bd: usize,
908) {
909  for x in 0..4 {
910    let vals = [
911      rec[0][x].as_(),
912      rec[1][x].as_(),
913      rec[2][x].as_(),
914      rec[3][x].as_(),
915      rec[4][x].as_(),
916      rec[5][x].as_(),
917      rec[6][x].as_(),
918      rec[7][x].as_(),
919      rec[8][x].as_(),
920      rec[9][x].as_(),
921      rec[10][x].as_(),
922      rec[11][x].as_(),
923      rec[12][x].as_(),
924      rec[13][x].as_(),
925    ];
926    if let Some(data) = deblock_size14_inner(vals, level, bd) {
927      copy_vertical(rec, x, 1, &data);
928    }
929  }
930}
931
932// Assumes rec[0] and src[0] are set 7 taps back from the edge.
933// Accesses fourteen taps, accumulates twelve pixels into the tally
934fn sse_size14<T: Pixel>(
935  rec: &PlaneRegion<'_, T>, src: &PlaneRegion<'_, T>,
936  tally: &mut [i64; MAX_LOOP_FILTER + 2], horizontal_p: bool, bd: usize,
937) {
938  let flat = 1 << (bd - 8);
939  for i in 0..4 {
940    let (p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, a) =
941      if horizontal_p {
942        // 14 taps
943        (
944          rec[0][i].as_(),
945          rec[1][i].as_(),
946          rec[2][i].as_(),
947          rec[3][i].as_(),
948          rec[4][i].as_(),
949          rec[5][i].as_(),
950          rec[6][i].as_(),
951          rec[7][i].as_(),
952          rec[8][i].as_(),
953          rec[9][i].as_(),
954          rec[10][i].as_(),
955          rec[11][i].as_(),
956          rec[12][i].as_(),
957          rec[13][i].as_(),
958          // 12 pixels to compare so offset one forward
959          [
960            src[1][i].as_(),
961            src[2][i].as_(),
962            src[3][i].as_(),
963            src[4][i].as_(),
964            src[5][i].as_(),
965            src[6][i].as_(),
966            src[7][i].as_(),
967            src[8][i].as_(),
968            src[9][i].as_(),
969            src[10][i].as_(),
970            src[11][i].as_(),
971            src[12][i].as_(),
972          ],
973        )
974      } else {
975        // 14 taps
976        (
977          rec[i][0].as_(),
978          rec[i][1].as_(),
979          rec[i][2].as_(),
980          rec[i][3].as_(),
981          rec[i][4].as_(),
982          rec[i][5].as_(),
983          rec[i][6].as_(),
984          rec[i][7].as_(),
985          rec[i][8].as_(),
986          rec[i][9].as_(),
987          rec[i][10].as_(),
988          rec[i][11].as_(),
989          rec[i][12].as_(),
990          rec[i][13].as_(),
991          // 12 pixels to compare so offset one forward
992          [
993            src[i][1].as_(),
994            src[i][2].as_(),
995            src[i][3].as_(),
996            src[i][4].as_(),
997            src[i][5].as_(),
998            src[i][6].as_(),
999            src[i][7].as_(),
1000            src[i][8].as_(),
1001            src[i][9].as_(),
1002            src[i][10].as_(),
1003            src[i][11].as_(),
1004            src[i][12].as_(),
1005          ],
1006        )
1007      };
1008
1009    // Five possibilities: no filter, wide14, wide8, narrow2 and narrow4
1010    let none: [i32; 12] = [p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5];
1011    let wide14 =
1012      filter_wide14_12(p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6);
1013    let wide8 =
1014      filter_wide8_12(p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5);
1015    let narrow2 = filter_narrow2_12(
1016      p5,
1017      p4,
1018      p3,
1019      p2,
1020      p1,
1021      p0,
1022      q0,
1023      q1,
1024      q2,
1025      q3,
1026      q4,
1027      q5,
1028      bd - 8,
1029    );
1030    let narrow4 = filter_narrow4_12(
1031      p5,
1032      p4,
1033      p3,
1034      p2,
1035      p1,
1036      p0,
1037      q0,
1038      q1,
1039      q2,
1040      q3,
1041      q4,
1042      q5,
1043      bd - 8,
1044    );
1045
1046    // mask8 sets the dividing line for filter vs no filter
1047    // flat8 decides between wide and narrow filters (unrelated to level)
1048    // flat14 decides between wide14 and wide8 filters
1049    // nhev4 sets the dividing line between narrow2 and narrow4
1050    let mask = clamp(
1051      mask8(p3, p2, p1, p0, q0, q1, q2, q3, bd - 8),
1052      1,
1053      MAX_LOOP_FILTER + 1,
1054    );
1055    let flat8p = flat8(p3, p2, p1, p0, q0, q1, q2, q3) <= flat;
1056    let flat14p = flat14_outer(p6, p5, p4, p0, q0, q4, q5, q6) <= flat;
1057    let nhev = clamp(nhev4(p1, p0, q0, q1, bd - 8), mask, MAX_LOOP_FILTER + 1);
1058
1059    // sse for each; short-circuit the 'special' no-op cases.
1060    let sse_none = stride_sse(&a, &none);
1061    let sse_wide8 = if flat8p && !flat14p && mask <= MAX_LOOP_FILTER {
1062      stride_sse(&a, &wide8)
1063    } else {
1064      sse_none
1065    };
1066    let sse_wide14 = if flat8p && flat14p && mask <= MAX_LOOP_FILTER {
1067      stride_sse(&a, &wide14)
1068    } else {
1069      sse_none
1070    };
1071    let sse_narrow2 = if !flat8p && nhev != mask {
1072      stride_sse(&a, &narrow2)
1073    } else {
1074      sse_none
1075    };
1076    let sse_narrow4 = if !flat8p && nhev <= MAX_LOOP_FILTER {
1077      stride_sse(&a, &narrow4)
1078    } else {
1079      sse_none
1080    };
1081
1082    // accumulate possible filter values into the tally
1083    tally[0] += sse_none;
1084    tally[mask] -= sse_none;
1085    if flat8p {
1086      if flat14p {
1087        tally[mask] += sse_wide14;
1088      } else {
1089        tally[mask] += sse_wide8;
1090      }
1091    } else {
1092      tally[mask] += sse_narrow2;
1093      tally[nhev] -= sse_narrow2;
1094      tally[nhev] += sse_narrow4;
1095    }
1096  }
1097}
1098
1099fn filter_v_edge<T: Pixel>(
1100  deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset,
1101  p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize,
1102) {
1103  let block = &blocks[bo];
1104  let txsize = if pli == 0 {
1105    block.txsize
1106  } else {
1107    block.bsize.largest_chroma_tx_size(xdec, ydec)
1108  };
1109  let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0;
1110  if tx_edge {
1111    let prev_block = deblock_left(blocks, bo, &p.as_const());
1112    let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0;
1113    let filter_size =
1114      deblock_size(block, prev_block, &p.as_const(), pli, true, block_edge);
1115    if filter_size > 0 {
1116      let level = deblock_level(deblock, block, prev_block, pli, true);
1117      if level > 0 {
1118        let po = bo.plane_offset(p.plane_cfg);
1119        let mut plane_region = p.subregion_mut(Area::Rect {
1120          x: po.x - (filter_size >> 1) as isize,
1121          y: po.y,
1122          width: filter_size,
1123          height: 4,
1124        });
1125        match filter_size {
1126          4 => {
1127            deblock_v_size4(&mut plane_region, level, bd);
1128          }
1129          6 => {
1130            deblock_v_size6(&mut plane_region, level, bd);
1131          }
1132          8 => {
1133            deblock_v_size8(&mut plane_region, level, bd);
1134          }
1135          14 => {
1136            deblock_v_size14(&mut plane_region, level, bd);
1137          }
1138          _ => unreachable!(),
1139        }
1140      }
1141    }
1142  }
1143}
1144
1145fn sse_v_edge<T: Pixel>(
1146  blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>,
1147  src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2],
1148  pli: usize, bd: usize, xdec: usize, ydec: usize,
1149) {
1150  let block = &blocks[bo];
1151  let txsize = if pli == 0 {
1152    block.txsize
1153  } else {
1154    block.bsize.largest_chroma_tx_size(xdec, ydec)
1155  };
1156  let tx_edge = bo.0.x >> xdec & (txsize.width_mi() - 1) == 0;
1157  if tx_edge {
1158    let prev_block = deblock_left(blocks, bo, rec_plane);
1159    let block_edge = bo.0.x & (block.n4_w as usize - 1) == 0;
1160    let filter_size =
1161      deblock_size(block, prev_block, rec_plane, pli, true, block_edge);
1162    if filter_size > 0 {
1163      let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling
1164      let rec_region = rec_plane.subregion(Area::Rect {
1165        x: po.x - (filter_size >> 1) as isize,
1166        y: po.y,
1167        width: filter_size,
1168        height: 4,
1169      });
1170      let src_region = src_plane.subregion(Area::Rect {
1171        x: po.x - (filter_size >> 1) as isize,
1172        y: po.y,
1173        width: filter_size,
1174        height: 4,
1175      });
1176      match filter_size {
1177        4 => {
1178          sse_size4(&rec_region, &src_region, tally, false, bd);
1179        }
1180        6 => {
1181          sse_size6(&rec_region, &src_region, tally, false, bd);
1182        }
1183        8 => {
1184          sse_size8(&rec_region, &src_region, tally, false, bd);
1185        }
1186        14 => {
1187          sse_size14(&rec_region, &src_region, tally, false, bd);
1188        }
1189        _ => unreachable!(),
1190      }
1191    }
1192  }
1193}
1194
1195fn filter_h_edge<T: Pixel>(
1196  deblock: &DeblockState, blocks: &TileBlocks, bo: TileBlockOffset,
1197  p: &mut PlaneRegionMut<T>, pli: usize, bd: usize, xdec: usize, ydec: usize,
1198) {
1199  let block = &blocks[bo];
1200  let txsize = if pli == 0 {
1201    block.txsize
1202  } else {
1203    block.bsize.largest_chroma_tx_size(xdec, ydec)
1204  };
1205  let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0;
1206  if tx_edge {
1207    let prev_block = deblock_up(blocks, bo, &p.as_const());
1208    let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0;
1209    let filter_size =
1210      deblock_size(block, prev_block, &p.as_const(), pli, false, block_edge);
1211    if filter_size > 0 {
1212      let level = deblock_level(deblock, block, prev_block, pli, false);
1213      if level > 0 {
1214        let po = bo.plane_offset(p.plane_cfg);
1215        let mut plane_region = p.subregion_mut(Area::Rect {
1216          x: po.x,
1217          y: po.y - (filter_size >> 1) as isize,
1218          width: 4,
1219          height: filter_size,
1220        });
1221        match filter_size {
1222          4 => {
1223            deblock_h_size4(&mut plane_region, level, bd);
1224          }
1225          6 => {
1226            deblock_h_size6(&mut plane_region, level, bd);
1227          }
1228          8 => {
1229            deblock_h_size8(&mut plane_region, level, bd);
1230          }
1231          14 => {
1232            deblock_h_size14(&mut plane_region, level, bd);
1233          }
1234          _ => unreachable!(),
1235        }
1236      }
1237    }
1238  }
1239}
1240
1241fn sse_h_edge<T: Pixel>(
1242  blocks: &TileBlocks, bo: TileBlockOffset, rec_plane: &PlaneRegion<T>,
1243  src_plane: &PlaneRegion<T>, tally: &mut [i64; MAX_LOOP_FILTER + 2],
1244  pli: usize, bd: usize, xdec: usize, ydec: usize,
1245) {
1246  let block = &blocks[bo];
1247  let txsize = if pli == 0 {
1248    block.txsize
1249  } else {
1250    block.bsize.largest_chroma_tx_size(xdec, ydec)
1251  };
1252  let tx_edge = bo.0.y >> ydec & (txsize.height_mi() - 1) == 0;
1253  if tx_edge {
1254    let prev_block = deblock_up(blocks, bo, rec_plane);
1255    let block_edge = bo.0.y & (block.n4_h as usize - 1) == 0;
1256    let filter_size =
1257      deblock_size(block, prev_block, rec_plane, pli, true, block_edge);
1258    if filter_size > 0 {
1259      let po = bo.plane_offset(rec_plane.plane_cfg); // rec and src have identical subsampling
1260      let rec_region = rec_plane.subregion(Area::Rect {
1261        x: po.x,
1262        y: po.y - (filter_size >> 1) as isize,
1263        width: 4,
1264        height: filter_size,
1265      });
1266      let src_region = src_plane.subregion(Area::Rect {
1267        x: po.x,
1268        y: po.y - (filter_size >> 1) as isize,
1269        width: 4,
1270        height: filter_size,
1271      });
1272
1273      match filter_size {
1274        4 => {
1275          sse_size4(&rec_region, &src_region, tally, true, bd);
1276        }
1277        6 => {
1278          sse_size6(&rec_region, &src_region, tally, true, bd);
1279        }
1280        8 => {
1281          sse_size8(&rec_region, &src_region, tally, true, bd);
1282        }
1283        14 => {
1284          sse_size14(&rec_region, &src_region, tally, true, bd);
1285        }
1286        _ => unreachable!(),
1287      }
1288    }
1289  }
1290}
1291
1292// Deblocks all edges, vertical and horizontal, in a single plane
1293#[profiling::function]
1294pub fn deblock_plane<T: Pixel>(
1295  deblock: &DeblockState, p: &mut PlaneRegionMut<T>, pli: usize,
1296  blocks: &TileBlocks, crop_w: usize, crop_h: usize, bd: usize,
1297) {
1298  let xdec = p.plane_cfg.xdec;
1299  let ydec = p.plane_cfg.ydec;
1300  assert!(xdec <= 1 && ydec <= 1);
1301
1302  match pli {
1303    0 => {
1304      if deblock.levels[0] == 0 && deblock.levels[1] == 0 {
1305        return;
1306      }
1307    }
1308    1 => {
1309      if deblock.levels[2] == 0 {
1310        return;
1311      }
1312    }
1313    2 => {
1314      if deblock.levels[3] == 0 {
1315        return;
1316      }
1317    }
1318    _ => return,
1319  }
1320
1321  let rect = p.rect();
1322  let cols = (cmp::min(
1323    blocks.cols(),
1324    ((crop_w - rect.x as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2,
1325  ) + (1 << xdec >> 1))
1326    >> xdec
1327    << xdec; // Clippy can go suck an egg
1328  let rows = (cmp::min(
1329    blocks.rows(),
1330    ((crop_h - rect.y as usize) + MI_SIZE - 1) >> MI_SIZE_LOG2,
1331  ) + (1 << ydec >> 1))
1332    >> ydec
1333    << ydec; // Clippy can go suck an egg
1334
1335  // vertical edge filtering leads horizontal by one full MI-sized
1336  // row (and horizontal filtering doesn't happen along the upper
1337  // edge).  Unroll to avoid corner-cases.
1338  if rows > 0 {
1339    for x in (1 << xdec..cols).step_by(1 << xdec) {
1340      filter_v_edge(
1341        deblock,
1342        blocks,
1343        TileBlockOffset(BlockOffset { x, y: 0 }),
1344        p,
1345        pli,
1346        bd,
1347        xdec,
1348        ydec,
1349      );
1350    }
1351    if rows > 1 << ydec {
1352      for x in (1 << xdec..cols).step_by(1 << xdec) {
1353        filter_v_edge(
1354          deblock,
1355          blocks,
1356          TileBlockOffset(BlockOffset { x, y: 1 << ydec }),
1357          p,
1358          pli,
1359          bd,
1360          xdec,
1361          ydec,
1362        );
1363      }
1364    }
1365  }
1366
1367  // filter rows where vertical and horizontal edge filtering both
1368  // happen (horizontal edge filtering lags vertical by one row).
1369  for y in ((2 << ydec)..rows).step_by(1 << ydec) {
1370    // Check for vertical edge at first MI block boundary on this row
1371    if cols > 1 << xdec {
1372      filter_v_edge(
1373        deblock,
1374        blocks,
1375        TileBlockOffset(BlockOffset { x: 1 << xdec, y }),
1376        p,
1377        pli,
1378        bd,
1379        xdec,
1380        ydec,
1381      );
1382    }
1383    // run the rest of the row with both vertical and horizontal edge filtering.
1384    // Horizontal lags vertical edge by one row and two columns.
1385    for x in (2 << xdec..cols).step_by(1 << xdec) {
1386      filter_v_edge(
1387        deblock,
1388        blocks,
1389        TileBlockOffset(BlockOffset { x, y }),
1390        p,
1391        pli,
1392        bd,
1393        xdec,
1394        ydec,
1395      );
1396      filter_h_edge(
1397        deblock,
1398        blocks,
1399        TileBlockOffset(BlockOffset {
1400          x: x - (2 << xdec),
1401          y: y - (1 << ydec),
1402        }),
1403        p,
1404        pli,
1405        bd,
1406        xdec,
1407        ydec,
1408      );
1409    }
1410    // ..and the last two horizontal edges for the row
1411    if cols >= 2 << xdec {
1412      filter_h_edge(
1413        deblock,
1414        blocks,
1415        TileBlockOffset(BlockOffset {
1416          x: cols - (2 << xdec),
1417          y: y - (1 << ydec),
1418        }),
1419        p,
1420        pli,
1421        bd,
1422        xdec,
1423        ydec,
1424      );
1425    }
1426    if cols >= 1 << xdec {
1427      filter_h_edge(
1428        deblock,
1429        blocks,
1430        TileBlockOffset(BlockOffset {
1431          x: cols - (1 << xdec),
1432          y: y - (1 << ydec),
1433        }),
1434        p,
1435        pli,
1436        bd,
1437        xdec,
1438        ydec,
1439      );
1440    }
1441  }
1442
1443  // Last horizontal row, vertical is already complete
1444  if rows > 1 << ydec {
1445    for x in (0..cols).step_by(1 << xdec) {
1446      filter_h_edge(
1447        deblock,
1448        blocks,
1449        TileBlockOffset(BlockOffset { x, y: rows - (1 << ydec) }),
1450        p,
1451        pli,
1452        bd,
1453        xdec,
1454        ydec,
1455      );
1456    }
1457  }
1458}
1459
1460// sse count of all edges in a single plane, accumulates into vertical and horizontal counts
1461fn sse_plane<T: Pixel>(
1462  rec: &PlaneRegion<T>, src: &PlaneRegion<T>,
1463  v_sse: &mut [i64; MAX_LOOP_FILTER + 2],
1464  h_sse: &mut [i64; MAX_LOOP_FILTER + 2], pli: usize, blocks: &TileBlocks,
1465  crop_w: usize, crop_h: usize, bd: usize,
1466) {
1467  let xdec = rec.plane_cfg.xdec;
1468  let ydec = rec.plane_cfg.ydec;
1469  assert!(xdec <= 1 && ydec <= 1);
1470  let rect = rec.rect();
1471  let cols = (cmp::min(
1472    blocks.cols(),
1473    (crop_w - rect.x as usize + MI_SIZE - 1) >> MI_SIZE_LOG2,
1474  ) + (1 << xdec >> 1))
1475    >> xdec
1476    << xdec; // Clippy can go suck an egg
1477  let rows = (cmp::min(
1478    blocks.rows(),
1479    (crop_h - rect.y as usize + MI_SIZE - 1) >> MI_SIZE_LOG2,
1480  ) + (1 << ydec >> 1))
1481    >> ydec
1482    << ydec; // Clippy can go suck an egg
1483
1484  // No horizontal edge filtering along top of frame
1485  for x in (1 << xdec..cols).step_by(1 << xdec) {
1486    sse_v_edge(
1487      blocks,
1488      TileBlockOffset(BlockOffset { x, y: 0 }),
1489      rec,
1490      src,
1491      v_sse,
1492      pli,
1493      bd,
1494      xdec,
1495      ydec,
1496    );
1497  }
1498
1499  // Unlike actual filtering, we're counting horizontal and vertical
1500  // as separable cases.  No need to lag the horizontal processing
1501  // behind vertical.
1502  for y in (1 << ydec..rows).step_by(1 << ydec) {
1503    // No vertical filtering along left edge of frame
1504    sse_h_edge(
1505      blocks,
1506      TileBlockOffset(BlockOffset { x: 0, y }),
1507      rec,
1508      src,
1509      h_sse,
1510      pli,
1511      bd,
1512      xdec,
1513      ydec,
1514    );
1515    for x in (1 << xdec..cols).step_by(1 << xdec) {
1516      sse_v_edge(
1517        blocks,
1518        TileBlockOffset(BlockOffset { x, y }),
1519        rec,
1520        src,
1521        v_sse,
1522        pli,
1523        bd,
1524        xdec,
1525        ydec,
1526      );
1527      sse_h_edge(
1528        blocks,
1529        TileBlockOffset(BlockOffset { x, y }),
1530        rec,
1531        src,
1532        h_sse,
1533        pli,
1534        bd,
1535        xdec,
1536        ydec,
1537      );
1538    }
1539  }
1540}
1541
1542// Deblocks all edges in all planes of a frame
1543#[profiling::function]
1544pub fn deblock_filter_frame<T: Pixel>(
1545  deblock: &DeblockState, tile: &mut TileMut<T>, blocks: &TileBlocks,
1546  crop_w: usize, crop_h: usize, bd: usize, planes: usize,
1547) {
1548  tile.planes[..planes].par_iter_mut().enumerate().for_each(|(pli, plane)| {
1549    deblock_plane(deblock, plane, pli, blocks, crop_w, crop_h, bd);
1550  });
1551}
1552
1553fn sse_optimize<T: Pixel>(
1554  rec: &Tile<T>, input: &Tile<T>, blocks: &TileBlocks, crop_w: usize,
1555  crop_h: usize, bd: usize, monochrome: bool,
1556) -> [u8; 4] {
1557  // i64 allows us to accumulate a total of ~ 35 bits worth of pixels
1558  assert!(
1559    ILog::ilog(input.planes[0].plane_cfg.width)
1560      + ILog::ilog(input.planes[0].plane_cfg.height)
1561      < 35
1562  );
1563  let mut level = [0; 4];
1564  let planes = if monochrome { 1 } else { MAX_PLANES };
1565
1566  for pli in 0..planes {
1567    let mut v_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2];
1568    let mut h_tally: [i64; MAX_LOOP_FILTER + 2] = [0; MAX_LOOP_FILTER + 2];
1569
1570    sse_plane(
1571      &rec.planes[pli],
1572      &input.planes[pli],
1573      &mut v_tally,
1574      &mut h_tally,
1575      pli,
1576      blocks,
1577      crop_w,
1578      crop_h,
1579      bd,
1580    );
1581
1582    for i in 1..=MAX_LOOP_FILTER {
1583      v_tally[i] += v_tally[i - 1];
1584      h_tally[i] += h_tally[i - 1];
1585    }
1586
1587    match pli {
1588      0 => {
1589        let mut best_v = 999;
1590        let mut best_h = 999;
1591        for i in 0..=MAX_LOOP_FILTER {
1592          if best_v == 999 || v_tally[best_v] > v_tally[i] {
1593            best_v = i;
1594          };
1595          if best_h == 999 || h_tally[best_h] > h_tally[i] {
1596            best_h = i;
1597          };
1598        }
1599        level[0] = best_v as u8;
1600        level[1] = best_h as u8;
1601      }
1602      1 | 2 => {
1603        let mut best = 999;
1604        for i in 0..=MAX_LOOP_FILTER {
1605          if best == 999
1606            || v_tally[best] + h_tally[best] > v_tally[i] + h_tally[i]
1607          {
1608            best = i;
1609          };
1610        }
1611        level[pli + 1] = best as u8;
1612      }
1613      _ => unreachable!(),
1614    }
1615  }
1616  level
1617}
1618
1619#[profiling::function]
1620pub fn deblock_filter_optimize<T: Pixel, U: Pixel>(
1621  fi: &FrameInvariants<T>, rec: &Tile<U>, input: &Tile<U>,
1622  blocks: &TileBlocks, crop_w: usize, crop_h: usize,
1623) -> [u8; 4] {
1624  if fi.config.speed_settings.fast_deblock {
1625    let q = ac_q(fi.base_q_idx, 0, fi.sequence.bit_depth).get() as i32;
1626    let level = clamp(
1627      match fi.sequence.bit_depth {
1628        8 => {
1629          if fi.frame_type == FrameType::KEY {
1630            (q * 17563 - 421_574 + (1 << 18 >> 1)) >> 18
1631          } else {
1632            (q * 6017 + 650_707 + (1 << 18 >> 1)) >> 18
1633          }
1634        }
1635        10 => {
1636          if fi.frame_type == FrameType::KEY {
1637            ((q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20) - 4
1638          } else {
1639            (q * 20723 + 4_060_632 + (1 << 20 >> 1)) >> 20
1640          }
1641        }
1642        12 => {
1643          if fi.frame_type == FrameType::KEY {
1644            ((q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22) - 4
1645          } else {
1646            (q * 20723 + 16_242_526 + (1 << 22 >> 1)) >> 22
1647          }
1648        }
1649        _ => unreachable!(),
1650      },
1651      0,
1652      MAX_LOOP_FILTER as i32,
1653    ) as u8;
1654    [level; 4]
1655  } else {
1656    // Deblocking happens in 4x4 (luma) units; luma x,y are clipped to
1657    // the *crop frame* of the entire frame by 4x4 block.
1658    sse_optimize(
1659      rec,
1660      input,
1661      blocks,
1662      crop_w,
1663      crop_h,
1664      fi.sequence.bit_depth,
1665      fi.sequence.chroma_sampling == Cs400,
1666    )
1667  }
1668}