1cfg_if::cfg_if! {
11  if #[cfg(nasm_x86_64)] {
12    use crate::asm::x86::lrf::*;
13  } else {
14    use self::rust::*;
15  }
16}
17
18use crate::api::SGRComplexityLevel;
19use crate::color::ChromaSampling::Cs400;
20use crate::context::{MAX_PLANES, SB_SIZE};
21use crate::encoder::FrameInvariants;
22use crate::frame::{
23  AsRegion, Frame, Plane, PlaneConfig, PlaneOffset, PlaneSlice,
24};
25use crate::tiling::{Area, PlaneRegion, PlaneRegionMut, Rect};
26use crate::util::{clamp, CastFromPrimitive, ILog, Pixel};
27use std::cmp;
28use std::iter::FusedIterator;
29use std::ops::{Index, IndexMut};
30
31pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
32
33pub const RESTORE_NONE: u8 = 0;
34pub const RESTORE_SWITCHABLE: u8 = 1;
35pub const RESTORE_WIENER: u8 = 2;
36pub const RESTORE_SGRPROJ: u8 = 3;
37
38pub const WIENER_TAPS_MIN: [i8; 3] = [-5, -23, -17];
39pub const WIENER_TAPS_MID: [i8; 3] = [3, -7, 15];
40pub const WIENER_TAPS_MAX: [i8; 3] = [10, 8, 46];
41#[allow(unused)]
42pub const WIENER_TAPS_K: [i8; 3] = [1, 2, 3];
43pub const WIENER_BITS: usize = 7;
44
45pub const SGRPROJ_XQD_MIN: [i8; 2] = [-96, -32];
46pub const SGRPROJ_XQD_MID: [i8; 2] = [-32, 31];
47pub const SGRPROJ_XQD_MAX: [i8; 2] = [31, 95];
48pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
49pub const SGRPROJ_PRJ_BITS: u8 = 7;
50pub const SGRPROJ_PARAMS_BITS: u8 = 4;
51pub const SGRPROJ_MTABLE_BITS: u8 = 20;
52pub const SGRPROJ_SGR_BITS: u8 = 8;
53pub const SGRPROJ_RECIP_BITS: u8 = 12;
54pub const SGRPROJ_RST_BITS: u8 = 4;
55pub const SGRPROJ_PARAMS_S: [[u32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
56  [140, 3236],
57  [112, 2158],
58  [93, 1618],
59  [80, 1438],
60  [70, 1295],
61  [58, 1177],
62  [47, 1079],
63  [37, 996],
64  [30, 925],
65  [25, 863],
66  [0, 2589],
67  [0, 1618],
68  [0, 1177],
69  [0, 925],
70  [56, 0],
71  [22, 0],
72];
73
74const SGRPROJ_ALL_SETS: &[u8] =
77  &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
78const SGRPROJ_REDUCED_SETS: &[u8] = &[1, 3, 5, 7, 9, 11, 13, 15];
86
87pub const fn get_sgr_sets(complexity: SGRComplexityLevel) -> &'static [u8] {
88  match complexity {
89    SGRComplexityLevel::Full => SGRPROJ_ALL_SETS,
90    SGRComplexityLevel::Reduced => SGRPROJ_REDUCED_SETS,
91  }
92}
93
94pub const SOLVE_IMAGE_MAX: usize = 1 << RESTORATION_TILESIZE_MAX_LOG2;
95pub const SOLVE_IMAGE_STRIDE: usize = SOLVE_IMAGE_MAX + 6 + 2;
96pub const SOLVE_IMAGE_HEIGHT: usize = SOLVE_IMAGE_STRIDE;
97pub const SOLVE_IMAGE_SIZE: usize = SOLVE_IMAGE_STRIDE * SOLVE_IMAGE_HEIGHT;
98
99pub const STRIPE_IMAGE_MAX: usize = (1 << RESTORATION_TILESIZE_MAX_LOG2)
100  + (1 << (RESTORATION_TILESIZE_MAX_LOG2 - 1));
101pub const STRIPE_IMAGE_STRIDE: usize = STRIPE_IMAGE_MAX + 6 + 2;
102pub const STRIPE_IMAGE_HEIGHT: usize = 64 + 6 + 2;
103pub const STRIPE_IMAGE_SIZE: usize = STRIPE_IMAGE_STRIDE * STRIPE_IMAGE_HEIGHT;
104
105pub const IMAGE_WIDTH_MAX: usize = [STRIPE_IMAGE_MAX, SOLVE_IMAGE_MAX]
106  [(STRIPE_IMAGE_MAX < SOLVE_IMAGE_MAX) as usize];
107
108#[derive(Debug)]
110pub struct IntegralImageBuffer {
111  pub integral_image: Vec<u32>,
112  pub sq_integral_image: Vec<u32>,
113}
114
115impl IntegralImageBuffer {
116  #[inline]
118  pub fn zeroed(size: usize) -> Self {
119    Self { integral_image: vec![0; size], sq_integral_image: vec![0; size] }
120  }
121}
122
123#[allow(unused)] #[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
125pub enum RestorationFilter {
126  #[default]
127  None,
128  Wiener {
129    coeffs: [[i8; 3]; 2],
130  },
131  Sgrproj {
132    set: u8,
133    xqd: [i8; 2],
134  },
135}
136
137impl RestorationFilter {
138  pub const fn notequal(self, cmp: RestorationFilter) -> bool {
139    match self {
140      RestorationFilter::None {} => !matches!(cmp, RestorationFilter::None {}),
141      RestorationFilter::Sgrproj { set, xqd } => {
142        if let RestorationFilter::Sgrproj { set: set2, xqd: xqd2 } = cmp {
143          !(set == set2 && xqd[0] == xqd2[0] && xqd[1] == xqd2[1])
144        } else {
145          true
146        }
147      }
148      RestorationFilter::Wiener { coeffs } => {
149        if let RestorationFilter::Wiener { coeffs: coeffs2 } = cmp {
150          !(coeffs[0][0] == coeffs2[0][0]
151            && coeffs[0][1] == coeffs2[0][1]
152            && coeffs[0][2] == coeffs2[0][2]
153            && coeffs[1][0] == coeffs2[1][0]
154            && coeffs[1][1] == coeffs2[1][1]
155            && coeffs[1][2] == coeffs2[1][2])
156        } else {
157          true
158        }
159      }
160    }
161  }
162}
163
164pub(crate) mod rust {
165  use crate::cpu_features::CpuFeatureLevel;
166  use crate::frame::PlaneSlice;
167  use crate::lrf::{
168    get_integral_square, sgrproj_sum_finish, SGRPROJ_RST_BITS,
169    SGRPROJ_SGR_BITS,
170  };
171  use crate::util::CastFromPrimitive;
172  use crate::Pixel;
173
174  #[inline(always)]
175  pub(crate) fn sgrproj_box_ab_internal<const BD: usize>(
176    r: usize, af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
177    iimg_stride: usize, start_x: usize, y: usize, stripe_w: usize, s: u32,
178  ) {
179    let d: usize = r * 2 + 1;
180    let n: usize = d * d;
181    let one_over_n = if r == 1 { 455 } else { 164 };
182
183    assert!(iimg.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
184    assert!(iimg_sq.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
185    assert!(af.len() > stripe_w + 1);
186    assert!(bf.len() > stripe_w + 1);
187
188    for x in start_x..stripe_w + 2 {
189      unsafe {
191        let sum = get_integral_square(iimg, iimg_stride, x, y, d);
192        let ssq = get_integral_square(iimg_sq, iimg_stride, x, y, d);
193        let (reta, retb) =
194          sgrproj_sum_finish::<BD>(ssq, sum, n as u32, one_over_n, s);
195        *af.get_unchecked_mut(x) = reta;
196        *bf.get_unchecked_mut(x) = retb;
197      }
198    }
199  }
200
201  pub(crate) fn sgrproj_box_ab_r1<const BD: usize>(
203    af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
204    iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
205    _cpu: CpuFeatureLevel,
206  ) {
207    sgrproj_box_ab_internal::<BD>(
208      1,
209      af,
210      bf,
211      iimg,
212      iimg_sq,
213      iimg_stride,
214      0,
215      y,
216      stripe_w,
217      s,
218    );
219  }
220
221  pub(crate) fn sgrproj_box_ab_r2<const BD: usize>(
223    af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
224    iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
225    _cpu: CpuFeatureLevel,
226  ) {
227    sgrproj_box_ab_internal::<BD>(
228      2,
229      af,
230      bf,
231      iimg,
232      iimg_sq,
233      iimg_stride,
234      0,
235      y,
236      stripe_w,
237      s,
238    );
239  }
240
241  pub(crate) fn sgrproj_box_f_r0<T: Pixel>(
242    f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice<T>,
243    _cpu: CpuFeatureLevel,
244  ) {
245    sgrproj_box_f_r0_internal(f, 0, y, w, cdeffed);
246  }
247
248  #[inline(always)]
249  pub(crate) fn sgrproj_box_f_r0_internal<T: Pixel>(
250    f: &mut [u32], start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
251  ) {
252    let line = cdeffed.row(y);
253    for (fp, &v) in f[start_x..w].iter_mut().zip(line[start_x..w].iter()) {
254      *fp = u32::cast_from(v) << SGRPROJ_RST_BITS;
255    }
256  }
257
258  pub(crate) fn sgrproj_box_f_r1<T: Pixel>(
259    af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize,
260    cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
261  ) {
262    sgrproj_box_f_r1_internal(af, bf, f, 0, y, w, cdeffed);
263  }
264
265  #[inline(always)]
266  pub(crate) fn sgrproj_box_f_r1_internal<T: Pixel>(
267    af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], start_x: usize,
268    y: usize, w: usize, cdeffed: &PlaneSlice<T>,
269  ) {
270    let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
271    let line = cdeffed.row(y);
272    for x in start_x..w {
273      let a = 3 * (af[0][x] + af[2][x] + af[0][x + 2] + af[2][x + 2])
274        + 4
275          * (af[1][x]
276            + af[0][x + 1]
277            + af[1][x + 1]
278            + af[2][x + 1]
279            + af[1][x + 2]);
280      let b = 3 * (bf[0][x] + bf[2][x] + bf[0][x + 2] + bf[2][x + 2])
281        + 4
282          * (bf[1][x]
283            + bf[0][x + 1]
284            + bf[1][x + 1]
285            + bf[2][x + 1]
286            + bf[1][x + 2]);
287      let v = a * u32::cast_from(line[x]) + b;
288      f[x] = (v + (1 << shift >> 1)) >> shift;
289    }
290  }
291
292  pub(crate) fn sgrproj_box_f_r2<T: Pixel>(
293    af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
294    y: usize, w: usize, cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
295  ) {
296    sgrproj_box_f_r2_internal(af, bf, f0, f1, 0, y, w, cdeffed);
297  }
298
299  #[inline(always)]
300  pub(crate) fn sgrproj_box_f_r2_internal<T: Pixel>(
301    af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
302    start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
303  ) {
304    let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
305    let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
306    let line = cdeffed.row(y);
307    let line1 = cdeffed.row(y + 1);
308
309    let af0 = af[0][start_x..w + 3].windows(3);
310    let af1 = af[1][start_x..w + 3].windows(3);
311    let bf0 = bf[0][start_x..w + 3].windows(3);
312    let bf1 = bf[1][start_x..w + 3].windows(3);
313
314    let af_it = af0.zip(af1);
315    let bf_it = bf0.zip(bf1);
316
317    let in0 = line[start_x..w].iter();
318    let in1 = line1[start_x..w].iter();
319
320    let o0 = f0[start_x..w].iter_mut();
321    let o1 = f1[start_x..w].iter_mut();
322
323    let in_iter = in0.zip(in1);
324    let out_iter = o0.zip(o1);
325
326    let io_iter = out_iter.zip(in_iter);
327
328    for (((o0, o1), (&p0, &p1)), ((af_0, af_1), (bf_0, bf_1))) in
329      io_iter.zip(af_it.zip(bf_it))
330    {
331      let a = 5 * (af_0[0] + af_0[2]) + 6 * af_0[1];
332      let b = 5 * (bf_0[0] + bf_0[2]) + 6 * bf_0[1];
333      let ao = 5 * (af_1[0] + af_1[2]) + 6 * af_1[1];
334      let bo = 5 * (bf_1[0] + bf_1[2]) + 6 * bf_1[1];
335      let v = (a + ao) * u32::cast_from(p0) + b + bo;
336      *o0 = (v + (1 << shift >> 1)) >> shift;
337      let vo = ao * u32::cast_from(p1) + bo;
338      *o1 = (vo + (1 << shifto >> 1)) >> shifto;
339    }
340  }
341}
342
343#[inline(always)]
344fn sgrproj_sum_finish<const BD: usize>(
345  ssq: u32, sum: u32, n: u32, one_over_n: u32, s: u32,
346) -> (u32, u32) {
347  let bdm8 = BD - 8;
348  let scaled_ssq = (ssq + (1 << (2 * bdm8) >> 1)) >> (2 * bdm8);
349  let scaled_sum = (sum + (1 << bdm8 >> 1)) >> bdm8;
350  let p = (scaled_ssq * n).saturating_sub(scaled_sum * scaled_sum);
351  let z = (p * s + (1 << SGRPROJ_MTABLE_BITS >> 1)) >> SGRPROJ_MTABLE_BITS;
352  let a = if z >= 255 {
353    256
354  } else if z == 0 {
355    1
356  } else {
357    ((z << SGRPROJ_SGR_BITS) + z / 2) / (z + 1)
358  };
359  let b = ((1 << SGRPROJ_SGR_BITS) - a) * sum * one_over_n;
360  (a, (b + (1 << SGRPROJ_RECIP_BITS >> 1)) >> SGRPROJ_RECIP_BITS)
361}
362
363#[inline(always)]
366unsafe fn get_integral_square(
367  iimg: &[u32], stride: usize, x: usize, y: usize, size: usize,
368) -> u32 {
369  let top_left = *iimg.get_unchecked(y * stride + x);
371  let top_right = *iimg.get_unchecked(y * stride + x + size);
372  let bottom_left = *iimg.get_unchecked((y + size) * stride + x);
373  let bottom_right = *iimg.get_unchecked((y + size) * stride + x + size);
374  top_left
375    .wrapping_add(bottom_right)
376    .wrapping_sub(bottom_left)
377    .wrapping_sub(top_right)
378}
379
380struct VertPaddedIter<'a, T: Pixel> {
381  deblocked: &'a Plane<T>,
383  cdeffed: &'a Plane<T>,
384  x: isize,
386  y: isize,
388  end: isize,
390  stripe_begin: isize,
393  stripe_end: isize,
396  crop: isize,
399}
400
401impl<'a, T: Pixel> VertPaddedIter<'a, T> {
402  fn new(
403    cdeffed: &PlaneSlice<'a, T>, deblocked: &PlaneSlice<'a, T>,
404    stripe_h: usize, crop: usize,
405  ) -> VertPaddedIter<'a, T> {
406    assert_eq!(cdeffed.x, deblocked.x);
410    assert_eq!(cdeffed.y, deblocked.y);
411
412    let r = 2;
414
415    let rows_above = r + 2;
417    let rows_below = 2;
418
419    let crop = crop as isize + deblocked.y;
422    let stripe_end = stripe_h as isize + deblocked.y;
423
424    let y = deblocked.y - rows_above as isize;
427
428    VertPaddedIter {
429      deblocked: deblocked.plane,
430      cdeffed: cdeffed.plane,
431      x: deblocked.x,
432      y,
433      end: (rows_above + stripe_h + rows_below) as isize + y,
434      stripe_begin: deblocked.y,
435      stripe_end,
436      crop,
437    }
438  }
439}
440
441impl<'a, T: Pixel> Iterator for VertPaddedIter<'a, T> {
442  type Item = &'a [T];
443
444  #[inline(always)]
445  fn next(&mut self) -> Option<Self::Item> {
446    if self.end > self.y {
447      let cropped_y = clamp(self.y, 0, self.crop - 1);
450      let ly = clamp(cropped_y, self.stripe_begin - 2, self.stripe_end + 1);
452
453      let src_plane = if ly >= self.stripe_begin && ly < self.stripe_end {
455        self.cdeffed
456      } else {
457        self.deblocked
458      };
459      let range = src_plane.row_range(self.x, ly);
461      self.y += 1;
462      Some(&src_plane.data[range])
463    } else {
464      None
465    }
466  }
467
468  fn size_hint(&self) -> (usize, Option<usize>) {
469    let remaining = self.end - self.y;
470    debug_assert!(remaining >= 0);
471    let remaining = remaining as usize;
472
473    (remaining, Some(remaining))
474  }
475}
476
477impl<T: Pixel> ExactSizeIterator for VertPaddedIter<'_, T> {}
478impl<T: Pixel> FusedIterator for VertPaddedIter<'_, T> {}
479
480struct HorzPaddedIter<'a, T: Pixel> {
481  slice: &'a [T],
483  index: isize,
487  end: usize,
489}
490
491impl<'a, T: Pixel> HorzPaddedIter<'a, T> {
492  fn new(
493    slice: &'a [T], start_index: isize, width: usize,
494  ) -> HorzPaddedIter<'a, T> {
495    HorzPaddedIter {
496      slice,
497      index: start_index,
498      end: (width as isize + start_index) as usize,
499    }
500  }
501}
502
503impl<'a, T: Pixel> Iterator for HorzPaddedIter<'a, T> {
504  type Item = &'a T;
505
506  #[inline(always)]
507  fn next(&mut self) -> Option<Self::Item> {
508    if self.index < self.end as isize {
509      let x = clamp(self.index, 0, self.slice.len() as isize - 1) as usize;
511      self.index += 1;
512      Some(&self.slice[x])
513    } else {
514      None
515    }
516  }
517
518  #[inline(always)]
519  fn size_hint(&self) -> (usize, Option<usize>) {
520    let size: usize = (self.end as isize - self.index) as usize;
521    (size, Some(size))
522  }
523}
524
525impl<T: Pixel> ExactSizeIterator for HorzPaddedIter<'_, T> {}
526impl<T: Pixel> FusedIterator for HorzPaddedIter<'_, T> {}
527
528#[profiling::function]
529pub fn setup_integral_image<T: Pixel>(
530  integral_image_buffer: &mut IntegralImageBuffer,
531  integral_image_stride: usize, crop_w: usize, crop_h: usize, stripe_w: usize,
532  stripe_h: usize, cdeffed: &PlaneSlice<T>, deblocked: &PlaneSlice<T>,
533) {
534  let integral_image = &mut integral_image_buffer.integral_image;
535  let sq_integral_image = &mut integral_image_buffer.sq_integral_image;
536
537  let left_w = 4; let right_w = 3; assert_eq!(cdeffed.x, deblocked.x);
542
543  let left_uniques = if cdeffed.x == 0 { 0 } else { left_w };
545  let right_uniques = right_w.min(crop_w - stripe_w);
546
547  let row_uniques = left_uniques + stripe_w + right_uniques;
549
550  let start_index_x = if cdeffed.x == 0 { -(left_w as isize) } else { 0 };
552
553  let mut rows_iter = VertPaddedIter::new(
554    &cdeffed.go_left(left_uniques),
556    &deblocked.go_left(left_uniques),
557    stripe_h + (stripe_h & 1),
559    crop_h,
560  )
561  .map(|row: &[T]| {
562    HorzPaddedIter::new(
563      &row[..row_uniques],
565      start_index_x,
566      left_w + stripe_w + right_w,
567    )
568  });
569
570  {
572    let mut sum: u32 = 0;
573    let mut sq_sum: u32 = 0;
574    let row = rows_iter.next().unwrap();
576    for (src, (integral, sq_integral)) in
577      row.zip(integral_image.iter_mut().zip(sq_integral_image.iter_mut()))
578    {
579      let current = u32::cast_from(*src);
580
581      sum = sum.wrapping_add(current);
584      *integral = sum;
585      sq_sum = sq_sum.wrapping_add(current * current);
586      *sq_integral = sq_sum;
587    }
588  }
589  let mut integral_slice = &mut integral_image[..];
591  let mut sq_integral_slice = &mut sq_integral_image[..];
592  for row in rows_iter {
593    let mut sum: u32 = 0;
594    let mut sq_sum: u32 = 0;
595
596    let (integral_row_prev, integral_row) =
600      integral_slice.split_at_mut(integral_image_stride);
601    let (sq_integral_row_prev, sq_integral_row) =
602      sq_integral_slice.split_at_mut(integral_image_stride);
603    for (
604      src,
605      ((integral_above, sq_integral_above), (integral, sq_integral)),
606    ) in row.zip(
607      integral_row_prev
608        .iter()
609        .zip(sq_integral_row_prev.iter())
610        .zip(integral_row.iter_mut().zip(sq_integral_row.iter_mut())),
611    ) {
612      let current = u32::cast_from(*src);
613      sum = sum.wrapping_add(current);
616      *integral = sum.wrapping_add(*integral_above);
617      sq_sum = sq_sum.wrapping_add(current * current);
618      *sq_integral = sq_sum.wrapping_add(*sq_integral_above);
619    }
620
621    integral_slice = integral_row;
624    sq_integral_slice = sq_integral_row;
625  }
626}
627
628#[profiling::function]
629pub fn sgrproj_stripe_filter<T: Pixel, U: Pixel>(
630  set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
631  integral_image_buffer: &IntegralImageBuffer, integral_image_stride: usize,
632  cdeffed: &PlaneSlice<U>, out: &mut PlaneRegionMut<U>,
633) {
634  let &Rect { width: stripe_w, height: stripe_h, .. } = out.rect();
635  let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
636    [[0; IMAGE_WIDTH_MAX + 2]; 2];
637  let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
638    [[0; IMAGE_WIDTH_MAX + 2]; 2];
639  let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
640  let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
641  let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
642    [[0; IMAGE_WIDTH_MAX + 2]; 3];
643  let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
644    [[0; IMAGE_WIDTH_MAX + 2]; 3];
645  let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
646
647  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
648  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
649
650  let fn_ab_r1 = match fi.sequence.bit_depth {
651    8 => sgrproj_box_ab_r1::<8>,
652    10 => sgrproj_box_ab_r1::<10>,
653    12 => sgrproj_box_ab_r1::<12>,
654    _ => unimplemented!(),
655  };
656  let fn_ab_r2 = match fi.sequence.bit_depth {
657    8 => sgrproj_box_ab_r2::<8>,
658    10 => sgrproj_box_ab_r2::<10>,
659    12 => sgrproj_box_ab_r2::<12>,
660    _ => unimplemented!(),
661  };
662
663  let integral_image = &integral_image_buffer.integral_image;
669  let sq_integral_image = &integral_image_buffer.sq_integral_image;
670  if s_r2 > 0 {
671    fn_ab_r2(
672      &mut a_r2[0],
673      &mut b_r2[0],
674      integral_image,
675      sq_integral_image,
676      integral_image_stride,
677      0,
678      stripe_w,
679      s_r2,
680      fi.cpu_feature_level,
681    );
682  }
683  if s_r1 > 0 {
684    let integral_image_offset = integral_image_stride + 1;
685    fn_ab_r1(
686      &mut a_r1[0],
687      &mut b_r1[0],
688      &integral_image[integral_image_offset..],
689      &sq_integral_image[integral_image_offset..],
690      integral_image_stride,
691      0,
692      stripe_w,
693      s_r1,
694      fi.cpu_feature_level,
695    );
696    fn_ab_r1(
697      &mut a_r1[1],
698      &mut b_r1[1],
699      &integral_image[integral_image_offset..],
700      &sq_integral_image[integral_image_offset..],
701      integral_image_stride,
702      1,
703      stripe_w,
704      s_r1,
705      fi.cpu_feature_level,
706    );
707  }
708
709  for y in (0..stripe_h).step_by(2) {
713    let f_r2_ab: [&[u32]; 2] = if s_r2 > 0 {
715      fn_ab_r2(
716        &mut a_r2[(y / 2 + 1) % 2],
717        &mut b_r2[(y / 2 + 1) % 2],
718        integral_image,
719        sq_integral_image,
720        integral_image_stride,
721        y + 2,
722        stripe_w,
723        s_r2,
724        fi.cpu_feature_level,
725      );
726      let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
727      let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
728      sgrproj_box_f_r2(
729        &ap0,
730        &bp0,
731        &mut f_r2_0,
732        &mut f_r2_1,
733        y,
734        stripe_w,
735        cdeffed,
736        fi.cpu_feature_level,
737      );
738      [&f_r2_0, &f_r2_1]
739    } else {
740      sgrproj_box_f_r0(
741        &mut f_r2_0,
742        y,
743        stripe_w,
744        cdeffed,
745        fi.cpu_feature_level,
746      );
747      [&f_r2_0, &f_r2_0]
749    };
750    for dy in 0..(2.min(stripe_h - y)) {
751      let y = y + dy;
752      if s_r1 > 0 {
753        let integral_image_offset = integral_image_stride + 1;
754        fn_ab_r1(
755          &mut a_r1[(y + 2) % 3],
756          &mut b_r1[(y + 2) % 3],
757          &integral_image[integral_image_offset..],
758          &sq_integral_image[integral_image_offset..],
759          integral_image_stride,
760          y + 2,
761          stripe_w,
762          s_r1,
763          fi.cpu_feature_level,
764        );
765        let ap1: [&[u32]; 3] =
766          [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
767        let bp1: [&[u32]; 3] =
768          [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
769        sgrproj_box_f_r1(
770          &ap1,
771          &bp1,
772          &mut f_r1,
773          y,
774          stripe_w,
775          cdeffed,
776          fi.cpu_feature_level,
777        );
778      } else {
779        sgrproj_box_f_r0(
780          &mut f_r1,
781          y,
782          stripe_w,
783          cdeffed,
784          fi.cpu_feature_level,
785        );
786      }
787
788      let w0 = xqd[0] as i32;
790      let w1 = xqd[1] as i32;
791      let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
792
793      let line = &cdeffed[y];
794
795      #[inline(always)]
796      fn apply_filter<U: Pixel>(
797        out: &mut [U], line: &[U], f_r1: &[u32], f_r2_ab: &[u32],
798        stripe_w: usize, bit_depth: usize, w0: i32, w1: i32, w2: i32,
799      ) {
800        let line_it = line[..stripe_w].iter();
801        let f_r2_ab_it = f_r2_ab[..stripe_w].iter();
802        let f_r1_it = f_r1[..stripe_w].iter();
803        let out_it = out[..stripe_w].iter_mut();
804
805        for ((o, &u), (&f_r2_ab, &f_r1)) in
806          out_it.zip(line_it).zip(f_r2_ab_it.zip(f_r1_it))
807        {
808          let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
809          let v = w0 * f_r2_ab as i32 + w1 * u + w2 * f_r1 as i32;
810          let s = (v + (1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) >> 1))
811            >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS);
812          *o = U::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
813        }
814      }
815
816      apply_filter(
817        &mut out[y],
818        line,
819        &f_r1,
820        f_r2_ab[dy],
821        stripe_w,
822        fi.sequence.bit_depth,
823        w0,
824        w1,
825        w2,
826      );
827    }
828  }
829}
830
831#[profiling::function]
846pub fn sgrproj_solve<T: Pixel>(
847  set: u8, fi: &FrameInvariants<T>,
848  integral_image_buffer: &IntegralImageBuffer, input: &PlaneRegion<'_, T>,
849  cdeffed: &PlaneSlice<T>, cdef_w: usize, cdef_h: usize,
850) -> (i8, i8) {
851  let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
852    [[0; IMAGE_WIDTH_MAX + 2]; 2];
853  let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
854    [[0; IMAGE_WIDTH_MAX + 2]; 2];
855  let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
856  let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
857  let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
858    [[0; IMAGE_WIDTH_MAX + 2]; 3];
859  let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
860    [[0; IMAGE_WIDTH_MAX + 2]; 3];
861  let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
862
863  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
864  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
865
866  let mut h: [[f64; 2]; 2] = [[0., 0.], [0., 0.]];
867  let mut c: [f64; 2] = [0., 0.];
868
869  let fn_ab_r1 = match fi.sequence.bit_depth {
870    8 => sgrproj_box_ab_r1::<8>,
871    10 => sgrproj_box_ab_r1::<10>,
872    12 => sgrproj_box_ab_r1::<12>,
873    _ => unimplemented!(),
874  };
875  let fn_ab_r2 = match fi.sequence.bit_depth {
876    8 => sgrproj_box_ab_r2::<8>,
877    10 => sgrproj_box_ab_r2::<10>,
878    12 => sgrproj_box_ab_r2::<12>,
879    _ => unimplemented!(),
880  };
881
882  let integral_image = &integral_image_buffer.integral_image;
888  let sq_integral_image = &integral_image_buffer.sq_integral_image;
889  if s_r2 > 0 {
890    fn_ab_r2(
891      &mut a_r2[0],
892      &mut b_r2[0],
893      integral_image,
894      sq_integral_image,
895      SOLVE_IMAGE_STRIDE,
896      0,
897      cdef_w,
898      s_r2,
899      fi.cpu_feature_level,
900    );
901  }
902  if s_r1 > 0 {
903    let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
904    fn_ab_r1(
905      &mut a_r1[0],
906      &mut b_r1[0],
907      &integral_image[integral_image_offset..],
908      &sq_integral_image[integral_image_offset..],
909      SOLVE_IMAGE_STRIDE,
910      0,
911      cdef_w,
912      s_r1,
913      fi.cpu_feature_level,
914    );
915    fn_ab_r1(
916      &mut a_r1[1],
917      &mut b_r1[1],
918      &integral_image[integral_image_offset..],
919      &sq_integral_image[integral_image_offset..],
920      SOLVE_IMAGE_STRIDE,
921      1,
922      cdef_w,
923      s_r1,
924      fi.cpu_feature_level,
925    );
926  }
927
928  for y in (0..cdef_h).step_by(2) {
932    let f_r2_01: [&[u32]; 2] = if s_r2 > 0 {
934      fn_ab_r2(
935        &mut a_r2[(y / 2 + 1) % 2],
936        &mut b_r2[(y / 2 + 1) % 2],
937        integral_image,
938        sq_integral_image,
939        SOLVE_IMAGE_STRIDE,
940        y + 2,
941        cdef_w,
942        s_r2,
943        fi.cpu_feature_level,
944      );
945      let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
946      let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
947      sgrproj_box_f_r2(
948        &ap0,
949        &bp0,
950        &mut f_r2_0,
951        &mut f_r2_1,
952        y,
953        cdef_w,
954        cdeffed,
955        fi.cpu_feature_level,
956      );
957      [&f_r2_0, &f_r2_1]
958    } else {
959      sgrproj_box_f_r0(&mut f_r2_0, y, cdef_w, cdeffed, fi.cpu_feature_level);
960      [&f_r2_0, &f_r2_0]
962    };
963    for dy in 0..(2.min(cdef_h - y)) {
964      let y = y + dy;
965      if s_r1 > 0 {
966        let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
967        fn_ab_r1(
968          &mut a_r1[(y + 2) % 3],
969          &mut b_r1[(y + 2) % 3],
970          &integral_image[integral_image_offset..],
971          &sq_integral_image[integral_image_offset..],
972          SOLVE_IMAGE_STRIDE,
973          y + 2,
974          cdef_w,
975          s_r1,
976          fi.cpu_feature_level,
977        );
978        let ap1: [&[u32]; 3] =
979          [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
980        let bp1: [&[u32]; 3] =
981          [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
982        sgrproj_box_f_r1(
983          &ap1,
984          &bp1,
985          &mut f_r1,
986          y,
987          cdef_w,
988          cdeffed,
989          fi.cpu_feature_level,
990        );
991      } else {
992        sgrproj_box_f_r0(&mut f_r1, y, cdef_w, cdeffed, fi.cpu_feature_level);
993      }
994
995      #[inline(always)]
996      fn process_line<T: Pixel>(
997        h: &mut [[f64; 2]; 2], c: &mut [f64; 2], cdeffed: &[T], input: &[T],
998        f_r1: &[u32], f_r2_ab: &[u32], cdef_w: usize,
999      ) {
1000        let cdeffed_it = cdeffed[..cdef_w].iter();
1001        let input_it = input[..cdef_w].iter();
1002        let f_r2_ab_it = f_r2_ab[..cdef_w].iter();
1003        let f_r1_it = f_r1[..cdef_w].iter();
1004
1005        #[derive(Debug, Copy, Clone)]
1006        struct Sums {
1007          h: [[i64; 2]; 2],
1008          c: [i64; 2],
1009        }
1010
1011        let sums: Sums = cdeffed_it
1012          .zip(input_it)
1013          .zip(f_r2_ab_it.zip(f_r1_it))
1014          .map(|((&u, &i), (&f2, &f1))| {
1015            let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
1016            let s = (i32::cast_from(i) << SGRPROJ_RST_BITS) - u;
1017            let f2 = f2 as i32 - u;
1018            let f1 = f1 as i32 - u;
1019            (s as i64, f1 as i64, f2 as i64)
1020          })
1021          .fold(Sums { h: [[0; 2]; 2], c: [0; 2] }, |sums, (s, f1, f2)| {
1022            let mut ret: Sums = sums;
1023            ret.h[0][0] += f2 * f2;
1024            ret.h[1][1] += f1 * f1;
1025            ret.h[0][1] += f1 * f2;
1026            ret.c[0] += f2 * s;
1027            ret.c[1] += f1 * s;
1028            ret
1029          });
1030
1031        h[0][0] += sums.h[0][0] as f64;
1032        h[1][1] += sums.h[1][1] as f64;
1033        h[0][1] += sums.h[0][1] as f64;
1034        c[0] += sums.c[0] as f64;
1035        c[1] += sums.c[1] as f64;
1036      }
1037
1038      process_line(
1039        &mut h,
1040        &mut c,
1041        &cdeffed[y],
1042        &input[y],
1043        &f_r1,
1044        f_r2_01[dy],
1045        cdef_w,
1046      );
1047    }
1048  }
1049
1050  let n = cdef_w as f64 * cdef_h as f64;
1052  h[0][0] /= n;
1053  h[0][1] /= n;
1054  h[1][1] /= n;
1055  h[1][0] = h[0][1];
1056  c[0] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1057  c[1] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1058  let (xq0, xq1) = if s_r2 == 0 {
1059    if h[1][1] == 0. {
1062      (0, 0)
1063    } else {
1064      (0, (c[1] / h[1][1]).round() as i32)
1065    }
1066  } else if s_r1 == 0 {
1067    if h[0][0] == 0. {
1070      (0, 0)
1071    } else {
1072      ((c[0] / h[0][0]).round() as i32, 0)
1073    }
1074  } else {
1075    let det = h[0][0].mul_add(h[1][1], -h[0][1] * h[1][0]);
1076    if det == 0. {
1077      (0, 0)
1078    } else {
1079      let div1 = h[1][1].mul_add(c[0], -h[0][1] * c[1]);
1081      let div2 = h[0][0].mul_add(c[1], -h[1][0] * c[0]);
1082      ((div1 / det).round() as i32, (div2 / det).round() as i32)
1083    }
1084  };
1085  {
1086    let xqd0 =
1087      clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32);
1088    let xqd1 = clamp(
1089      (1 << SGRPROJ_PRJ_BITS) - xqd0 - xq1,
1090      SGRPROJ_XQD_MIN[1] as i32,
1091      SGRPROJ_XQD_MAX[1] as i32,
1092    );
1093    (xqd0 as i8, xqd1 as i8)
1094  }
1095}
1096
1097#[profiling::function]
1098fn wiener_stripe_filter<T: Pixel>(
1099  coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>, crop_w: usize, crop_h: usize,
1100  stripe_w: usize, stripe_h: usize, stripe_x: usize, stripe_y: isize,
1101  cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>,
1102) {
1103  let bit_depth = fi.sequence.bit_depth;
1104  let round_h = if bit_depth == 12 { 5 } else { 3 };
1105  let round_v = if bit_depth == 12 { 9 } else { 11 };
1106  let offset = 1 << (bit_depth + WIENER_BITS - round_h - 1);
1107  let limit = (1 << (bit_depth + 1 + WIENER_BITS - round_h)) - 1;
1108
1109  let mut coeffs_ = [[0; 3]; 2];
1110  for i in 0..2 {
1111    for j in 0..3 {
1112      coeffs_[i][j] = i32::from(coeffs[i][j]);
1113    }
1114  }
1115
1116  let mut work: [i32; SB_SIZE + 7] = [0; SB_SIZE + 7];
1117  let vfilter: [i32; 7] = [
1118    coeffs_[0][0],
1119    coeffs_[0][1],
1120    coeffs_[0][2],
1121    128 - 2 * (coeffs_[0][0] + coeffs_[0][1] + coeffs_[0][2]),
1122    coeffs_[0][2],
1123    coeffs_[0][1],
1124    coeffs_[0][0],
1125  ];
1126  let hfilter: [i32; 7] = [
1127    coeffs_[1][0],
1128    coeffs_[1][1],
1129    coeffs_[1][2],
1130    128 - 2 * (coeffs_[1][0] + coeffs_[1][1] + coeffs_[1][2]),
1131    coeffs_[1][2],
1132    coeffs_[1][1],
1133    coeffs_[1][0],
1134  ];
1135
1136  let start_wi = if stripe_y < 0 { -stripe_y } else { 0 } as usize;
1139  let start_yi = if stripe_y < 0 { 0 } else { stripe_y } as usize;
1140  let end_i = cmp::max(
1141    0,
1142    if stripe_h as isize + stripe_y > crop_h as isize {
1143      crop_h as isize - stripe_y - start_wi as isize
1144    } else {
1145      stripe_h as isize - start_wi as isize
1146    },
1147  ) as usize;
1148
1149  let mut out_slice =
1150    out.mut_slice(PlaneOffset { x: 0, y: start_yi as isize });
1151
1152  for xi in stripe_x..stripe_x + stripe_w {
1153    let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
1154    for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
1155      let mut acc = 0;
1156      let src = if yi < stripe_y {
1157        let ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2);
1158        deblocked.row(ly)
1159      } else if yi < stripe_y + stripe_h as isize {
1160        let ly = clamp(yi, 0, crop_h as isize - 1);
1161        cdeffed.row(ly)
1162      } else {
1163        let ly = cmp::min(
1164          clamp(yi, 0, crop_h as isize - 1),
1165          stripe_y + stripe_h as isize + 1,
1166        );
1167        deblocked.row(ly)
1168      };
1169      let start = i32::cast_from(src[0]);
1170      let end = i32::cast_from(src[crop_w - 1]);
1171      for i in 0..3 - xi as isize {
1172        acc += hfilter[i as usize] * start;
1173      }
1174
1175      let off = 3 - (xi as isize);
1176      let s = cmp::max(0, off) as usize;
1177      let s1 = (s as isize - off) as usize;
1178      let n1 = (n - off) as usize;
1179
1180      for (hf, &v) in hfilter[s..n as usize].iter().zip(src[s1..n1].iter()) {
1181        acc += hf * i32::cast_from(v);
1182      }
1183
1184      for i in n..7 {
1185        acc += hfilter[i as usize] * end;
1186      }
1187
1188      acc = (acc + (1 << round_h >> 1)) >> round_h;
1189      work[(yi - stripe_y + 3) as usize] = clamp(acc, -offset, limit - offset);
1190    }
1191
1192    for (wi, dst) in (start_wi..start_wi + end_i)
1193      .zip(out_slice.rows_iter_mut().map(|row| &mut row[xi]).take(end_i))
1194    {
1195      let mut acc = 0;
1196      for (i, src) in (0..7).zip(work[wi..wi + 7].iter_mut()) {
1197        acc += vfilter[i] * *src;
1198      }
1199      *dst = T::cast_from(clamp(
1200        (acc + (1 << round_v >> 1)) >> round_v,
1201        0,
1202        (1 << bit_depth) - 1,
1203      ));
1204    }
1205  }
1206}
1207
1208#[derive(Copy, Clone, Debug, Default)]
1209pub struct RestorationUnit {
1210  pub filter: RestorationFilter,
1211}
1212
1213#[derive(Clone, Debug)]
1214pub struct FrameRestorationUnits {
1215  units: Box<[RestorationUnit]>,
1216  pub cols: usize,
1217  pub rows: usize,
1218}
1219
1220impl FrameRestorationUnits {
1221  pub fn new(cols: usize, rows: usize) -> Self {
1222    Self {
1223      units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
1224      cols,
1225      rows,
1226    }
1227  }
1228}
1229
1230impl Index<usize> for FrameRestorationUnits {
1231  type Output = [RestorationUnit];
1232  #[inline(always)]
1233  fn index(&self, index: usize) -> &Self::Output {
1234    &self.units[index * self.cols..(index + 1) * self.cols]
1235  }
1236}
1237
1238impl IndexMut<usize> for FrameRestorationUnits {
1239  #[inline(always)]
1240  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
1241    &mut self.units[index * self.cols..(index + 1) * self.cols]
1242  }
1243}
1244
1245#[derive(Clone, Debug)]
1246pub struct RestorationPlaneConfig {
1247  pub lrf_type: u8,
1248  pub unit_size: usize,
1249  pub sb_h_shift: usize,
1252  pub sb_v_shift: usize,
1253  pub sb_cols: usize, pub sb_rows: usize, pub stripe_height: usize,
1258  pub cols: usize,
1259  pub rows: usize,
1260}
1261
1262#[derive(Clone, Debug)]
1263pub struct RestorationPlane {
1264  pub cfg: RestorationPlaneConfig,
1265  pub units: FrameRestorationUnits,
1266}
1267
1268#[derive(Clone, Default)]
1269pub struct RestorationPlaneOffset {
1270  pub row: usize,
1271  pub col: usize,
1272}
1273
1274impl RestorationPlane {
1275  pub fn new(
1276    lrf_type: u8, unit_size: usize, sb_h_shift: usize, sb_v_shift: usize,
1277    sb_cols: usize, sb_rows: usize, stripe_decimate: usize, cols: usize,
1278    rows: usize,
1279  ) -> RestorationPlane {
1280    let stripe_height = if stripe_decimate != 0 { 32 } else { 64 };
1281    RestorationPlane {
1282      cfg: RestorationPlaneConfig {
1283        lrf_type,
1284        unit_size,
1285        sb_h_shift,
1286        sb_v_shift,
1287        sb_cols,
1288        sb_rows,
1289        stripe_height,
1290        cols,
1291        rows,
1292      },
1293      units: FrameRestorationUnits::new(cols, rows),
1294    }
1295  }
1296
1297  fn restoration_unit_index_by_stripe(
1301    &self, stripenum: usize, rux: usize,
1302  ) -> (usize, usize) {
1303    (
1304      cmp::min(rux, self.cfg.cols - 1),
1305      cmp::min(
1306        stripenum * self.cfg.stripe_height / self.cfg.unit_size,
1307        self.cfg.rows - 1,
1308      ),
1309    )
1310  }
1311
1312  pub fn restoration_unit_by_stripe(
1313    &self, stripenum: usize, rux: usize,
1314  ) -> &RestorationUnit {
1315    let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
1316    &self.units[y][x]
1317  }
1318}
1319
1320#[derive(Clone, Debug)]
1321pub struct RestorationState {
1322  pub planes: [RestorationPlane; MAX_PLANES],
1323}
1324
1325impl RestorationState {
1326  pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
1327    let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
1328    let stripe_uv_decimate = usize::from(xdec > 0 && ydec > 0);
1330    let y_sb_log2 = if fi.sequence.use_128x128_superblock { 7 } else { 6 };
1331    let uv_sb_h_log2 = y_sb_log2 - xdec;
1332    let uv_sb_v_log2 = y_sb_log2 - ydec;
1333
1334    let (lrf_y_shift, lrf_uv_shift) = if fi.sequence.enable_large_lru
1335      && fi.sequence.enable_restoration
1336    {
1337      assert!(
1338        fi.width > 1 && fi.height > 1,
1339        "Width and height must be higher than 1 for LRF setup"
1340      );
1341
1342      let lrf_base_shift = if fi.base_q_idx > 200 {
1345        0 } else if fi.base_q_idx > 160 {
1347        1
1348      } else {
1349        2 };
1351      let lrf_chroma_shift = if stripe_uv_decimate > 0 {
1352        if lrf_base_shift == 2 {
1354          1 } else {
1356          let lrf_unit_size =
1359            1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_base_shift);
1360          let unshifted_stretch = ((fi.width >> xdec) - 1) % lrf_unit_size
1361            <= lrf_unit_size / 2
1362            || ((fi.height >> ydec) - 1) % lrf_unit_size <= lrf_unit_size / 2;
1363          let shifted_stretch = ((fi.width >> xdec) - 1)
1364            % (lrf_unit_size >> 1)
1365            <= lrf_unit_size / 4
1366            || ((fi.height >> ydec) - 1) % (lrf_unit_size >> 1)
1367              <= lrf_unit_size / 4;
1368          usize::from(unshifted_stretch && !shifted_stretch)
1371        }
1372      } else {
1373        0
1374      };
1375      (lrf_base_shift, lrf_base_shift + lrf_chroma_shift)
1376    } else {
1377      let lrf_y_shift = if fi.sequence.use_128x128_superblock { 1 } else { 2 };
1380      (lrf_y_shift, lrf_y_shift + stripe_uv_decimate)
1381    };
1382
1383    let mut y_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift);
1384    let mut uv_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift);
1385
1386    let tiling = fi.sequence.tiling;
1387    if tiling.cols > 1 || tiling.rows > 1 {
1391      let trailing_h_zeros = tiling.tile_width_sb.trailing_zeros() as usize;
1394      let trailing_v_zeros = tiling.tile_height_sb.trailing_zeros() as usize;
1395      let tile_aligned_y_unit_size =
1396        1 << (y_sb_log2 + trailing_h_zeros.min(trailing_v_zeros));
1397      let tile_aligned_uv_h_unit_size = 1 << (uv_sb_h_log2 + trailing_h_zeros);
1398      let tile_aligned_uv_v_unit_size = 1 << (uv_sb_v_log2 + trailing_v_zeros);
1399      y_unit_size = y_unit_size.min(tile_aligned_y_unit_size);
1400      uv_unit_size = uv_unit_size
1401        .min(tile_aligned_uv_h_unit_size.min(tile_aligned_uv_v_unit_size));
1402
1403      }
1418
1419    if ydec == 0 && y_unit_size != uv_unit_size {
1429      y_unit_size = uv_unit_size.min(y_unit_size);
1430      uv_unit_size = y_unit_size;
1431    }
1432
1433    let y_unit_log2 = y_unit_size.ilog() - 1;
1435    let uv_unit_log2 = uv_unit_size.ilog() - 1;
1436    let y_cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
1437    let y_rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
1438    let uv_cols = ((((fi.width + (1 << xdec >> 1)) >> xdec)
1439      + (uv_unit_size >> 1))
1440      / uv_unit_size)
1441      .max(1);
1442    let uv_rows = ((((fi.height + (1 << ydec >> 1)) >> ydec)
1443      + (uv_unit_size >> 1))
1444      / uv_unit_size)
1445      .max(1);
1446
1447    RestorationState {
1448      planes: [
1449        RestorationPlane::new(
1450          RESTORE_SWITCHABLE,
1451          y_unit_size,
1452          y_unit_log2 - y_sb_log2,
1453          y_unit_log2 - y_sb_log2,
1454          fi.sb_width,
1455          fi.sb_height,
1456          0,
1457          y_cols,
1458          y_rows,
1459        ),
1460        RestorationPlane::new(
1461          RESTORE_SWITCHABLE,
1462          uv_unit_size,
1463          uv_unit_log2 - uv_sb_h_log2,
1464          uv_unit_log2 - uv_sb_v_log2,
1465          fi.sb_width,
1466          fi.sb_height,
1467          stripe_uv_decimate,
1468          uv_cols,
1469          uv_rows,
1470        ),
1471        RestorationPlane::new(
1472          RESTORE_SWITCHABLE,
1473          uv_unit_size,
1474          uv_unit_log2 - uv_sb_h_log2,
1475          uv_unit_log2 - uv_sb_v_log2,
1476          fi.sb_width,
1477          fi.sb_height,
1478          stripe_uv_decimate,
1479          uv_cols,
1480          uv_rows,
1481        ),
1482      ],
1483    }
1484  }
1485
1486  #[profiling::function]
1487  pub fn lrf_filter_frame<T: Pixel>(
1488    &mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
1489    fi: &FrameInvariants<T>,
1490  ) {
1491    let cdeffed = out.clone();
1492    let planes =
1493      if fi.sequence.chroma_sampling == Cs400 { 1 } else { MAX_PLANES };
1494
1495    let stripe_n = (fi.height + 7) / 64 + 1;
1502
1503    let mut stripe_filter_buffer =
1505      IntegralImageBuffer::zeroed(STRIPE_IMAGE_SIZE);
1506
1507    for pli in 0..planes {
1508      let rp = &self.planes[pli];
1509      let xdec = out.planes[pli].cfg.xdec;
1510      let ydec = out.planes[pli].cfg.ydec;
1511      let crop_w = (fi.width + (1 << xdec >> 1)) >> xdec;
1512      let crop_h = (fi.height + (1 << ydec >> 1)) >> ydec;
1513
1514      for si in 0..stripe_n {
1515        let (stripe_start_y, stripe_size) = if si == 0 {
1516          (0, (64 - 8) >> ydec)
1517        } else {
1518          let start = (si * 64 - 8) >> ydec;
1519          (
1520            start as isize,
1521            (64 >> ydec).min(crop_h - start),
1523          )
1524        };
1525
1526        for rux in 0..rp.cfg.cols {
1528          let x = rux * rp.cfg.unit_size;
1530          let size =
1531            if rux == rp.cfg.cols - 1 { crop_w - x } else { rp.cfg.unit_size };
1532          let ru = rp.restoration_unit_by_stripe(si, rux);
1533          match ru.filter {
1534            RestorationFilter::Wiener { coeffs } => {
1535              wiener_stripe_filter(
1536                coeffs,
1537                fi,
1538                crop_w,
1539                crop_h,
1540                size,
1541                stripe_size,
1542                x,
1543                stripe_start_y,
1544                &cdeffed.planes[pli],
1545                &pre_cdef.planes[pli],
1546                &mut out.planes[pli],
1547              );
1548            }
1549            RestorationFilter::Sgrproj { set, xqd } => {
1550              if !fi.sequence.enable_cdef {
1551                continue;
1552              }
1553
1554              setup_integral_image(
1555                &mut stripe_filter_buffer,
1556                STRIPE_IMAGE_STRIDE,
1557                crop_w - x,
1558                (crop_h as isize - stripe_start_y) as usize,
1559                size,
1560                stripe_size,
1561                &cdeffed.planes[pli]
1562                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1563                &pre_cdef.planes[pli]
1564                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1565              );
1566
1567              sgrproj_stripe_filter(
1568                set,
1569                xqd,
1570                fi,
1571                &stripe_filter_buffer,
1572                STRIPE_IMAGE_STRIDE,
1573                &cdeffed.planes[pli]
1574                  .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1575                &mut out.planes[pli].region_mut(Area::Rect {
1576                  x: x as isize,
1577                  y: stripe_start_y,
1578                  width: size,
1579                  height: stripe_size,
1580                }),
1581              );
1582            }
1583            RestorationFilter::None => {
1584              }
1586          }
1587        }
1588      }
1589    }
1590  }
1591}