1use std::cmp;
11use std::iter::FusedIterator;
12use std::ops::{Index, IndexMut};
13
14use crate::api::SGRComplexityLevel;
15use crate::color::ChromaSampling::Cs400;
16use crate::context::{MAX_PLANES, SB_SIZE};
17use crate::encoder::FrameInvariants;
18use crate::frame::{
19 AsRegion, Frame, Plane, PlaneConfig, PlaneOffset, PlaneSlice,
20};
21use crate::tiling::{Area, PlaneRegion, PlaneRegionMut, Rect};
22use crate::util::{clamp, CastFromPrimitive, ILog, Pixel};
23
24cfg_if::cfg_if! {
25 if #[cfg(nasm_x86_64)] {
26 use crate::asm::x86::lrf::*;
27 } else {
28 use self::rust::*;
29 }
30}
31
32pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
33
34pub const RESTORE_NONE: u8 = 0;
35pub const RESTORE_SWITCHABLE: u8 = 1;
36pub const RESTORE_WIENER: u8 = 2;
37pub const RESTORE_SGRPROJ: u8 = 3;
38
39pub const WIENER_TAPS_MIN: [i8; 3] = [-5, -23, -17];
40pub const WIENER_TAPS_MID: [i8; 3] = [3, -7, 15];
41pub const WIENER_TAPS_MAX: [i8; 3] = [10, 8, 46];
42#[allow(unused)]
43pub const WIENER_TAPS_K: [i8; 3] = [1, 2, 3];
44pub const WIENER_BITS: usize = 7;
45
46pub const SGRPROJ_XQD_MIN: [i8; 2] = [-96, -32];
47pub const SGRPROJ_XQD_MID: [i8; 2] = [-32, 31];
48pub const SGRPROJ_XQD_MAX: [i8; 2] = [31, 95];
49pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
50pub const SGRPROJ_PRJ_BITS: u8 = 7;
51pub const SGRPROJ_PARAMS_BITS: u8 = 4;
52pub const SGRPROJ_MTABLE_BITS: u8 = 20;
53pub const SGRPROJ_SGR_BITS: u8 = 8;
54pub const SGRPROJ_RECIP_BITS: u8 = 12;
55pub const SGRPROJ_RST_BITS: u8 = 4;
56pub const SGRPROJ_PARAMS_S: [[u32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
57 [140, 3236],
58 [112, 2158],
59 [93, 1618],
60 [80, 1438],
61 [70, 1295],
62 [58, 1177],
63 [47, 1079],
64 [37, 996],
65 [30, 925],
66 [25, 863],
67 [0, 2589],
68 [0, 1618],
69 [0, 1177],
70 [0, 925],
71 [56, 0],
72 [22, 0],
73];
74
75const SGRPROJ_ALL_SETS: &[u8] =
78 &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
79const SGRPROJ_REDUCED_SETS: &[u8] = &[1, 3, 5, 7, 9, 11, 13, 15];
87
88pub const fn get_sgr_sets(complexity: SGRComplexityLevel) -> &'static [u8] {
89 match complexity {
90 SGRComplexityLevel::Full => SGRPROJ_ALL_SETS,
91 SGRComplexityLevel::Reduced => SGRPROJ_REDUCED_SETS,
92 }
93}
94
95pub const SOLVE_IMAGE_MAX: usize = 1 << RESTORATION_TILESIZE_MAX_LOG2;
96pub const SOLVE_IMAGE_STRIDE: usize = SOLVE_IMAGE_MAX + 6 + 2;
97pub const SOLVE_IMAGE_HEIGHT: usize = SOLVE_IMAGE_STRIDE;
98pub const SOLVE_IMAGE_SIZE: usize = SOLVE_IMAGE_STRIDE * SOLVE_IMAGE_HEIGHT;
99
100pub const STRIPE_IMAGE_MAX: usize = (1 << RESTORATION_TILESIZE_MAX_LOG2)
101 + (1 << (RESTORATION_TILESIZE_MAX_LOG2 - 1));
102pub const STRIPE_IMAGE_STRIDE: usize = STRIPE_IMAGE_MAX + 6 + 2;
103pub const STRIPE_IMAGE_HEIGHT: usize = 64 + 6 + 2;
104pub const STRIPE_IMAGE_SIZE: usize = STRIPE_IMAGE_STRIDE * STRIPE_IMAGE_HEIGHT;
105
106pub const IMAGE_WIDTH_MAX: usize = [STRIPE_IMAGE_MAX, SOLVE_IMAGE_MAX]
107 [(STRIPE_IMAGE_MAX < SOLVE_IMAGE_MAX) as usize];
108
109#[derive(Debug)]
111pub struct IntegralImageBuffer {
112 pub integral_image: Vec<u32>,
113 pub sq_integral_image: Vec<u32>,
114}
115
116impl IntegralImageBuffer {
117 #[inline]
119 pub fn zeroed(size: usize) -> Self {
120 Self { integral_image: vec![0; size], sq_integral_image: vec![0; size] }
121 }
122}
123
124#[allow(unused)] #[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
126pub enum RestorationFilter {
127 #[default]
128 None,
129 Wiener {
130 coeffs: [[i8; 3]; 2],
131 },
132 Sgrproj {
133 set: u8,
134 xqd: [i8; 2],
135 },
136}
137
138impl RestorationFilter {
139 pub const fn notequal(self, cmp: RestorationFilter) -> bool {
140 match self {
141 RestorationFilter::None => !matches!(cmp, RestorationFilter::None),
142 RestorationFilter::Sgrproj { set, xqd } => {
143 if let RestorationFilter::Sgrproj { set: set2, xqd: xqd2 } = cmp {
144 !(set == set2 && xqd[0] == xqd2[0] && xqd[1] == xqd2[1])
145 } else {
146 true
147 }
148 }
149 RestorationFilter::Wiener { coeffs } => {
150 if let RestorationFilter::Wiener { coeffs: coeffs2 } = cmp {
151 !(coeffs[0][0] == coeffs2[0][0]
152 && coeffs[0][1] == coeffs2[0][1]
153 && coeffs[0][2] == coeffs2[0][2]
154 && coeffs[1][0] == coeffs2[1][0]
155 && coeffs[1][1] == coeffs2[1][1]
156 && coeffs[1][2] == coeffs2[1][2])
157 } else {
158 true
159 }
160 }
161 }
162 }
163}
164
165pub(crate) mod rust {
166 use crate::cpu_features::CpuFeatureLevel;
167 use crate::frame::PlaneSlice;
168 use crate::lrf::{
169 get_integral_square, sgrproj_sum_finish, SGRPROJ_RST_BITS,
170 SGRPROJ_SGR_BITS,
171 };
172 use crate::util::CastFromPrimitive;
173 use crate::Pixel;
174
175 #[inline(always)]
176 pub(crate) fn sgrproj_box_ab_internal<const BD: usize>(
177 r: usize, af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
178 iimg_stride: usize, start_x: usize, y: usize, stripe_w: usize, s: u32,
179 ) {
180 let d: usize = r * 2 + 1;
181 let n: usize = d * d;
182 let one_over_n = if r == 1 { 455 } else { 164 };
183
184 assert!(iimg.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
185 assert!(iimg_sq.len() > (y + d) * iimg_stride + stripe_w + 1 + d);
186 assert!(af.len() > stripe_w + 1);
187 assert!(bf.len() > stripe_w + 1);
188
189 for x in start_x..stripe_w + 2 {
190 unsafe {
192 let sum = get_integral_square(iimg, iimg_stride, x, y, d);
193 let ssq = get_integral_square(iimg_sq, iimg_stride, x, y, d);
194 let (reta, retb) =
195 sgrproj_sum_finish::<BD>(ssq, sum, n as u32, one_over_n, s);
196 *af.get_unchecked_mut(x) = reta;
197 *bf.get_unchecked_mut(x) = retb;
198 }
199 }
200 }
201
202 pub(crate) fn sgrproj_box_ab_r1<const BD: usize>(
204 af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
205 iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
206 _cpu: CpuFeatureLevel,
207 ) {
208 sgrproj_box_ab_internal::<BD>(
209 1,
210 af,
211 bf,
212 iimg,
213 iimg_sq,
214 iimg_stride,
215 0,
216 y,
217 stripe_w,
218 s,
219 );
220 }
221
222 pub(crate) fn sgrproj_box_ab_r2<const BD: usize>(
224 af: &mut [u32], bf: &mut [u32], iimg: &[u32], iimg_sq: &[u32],
225 iimg_stride: usize, y: usize, stripe_w: usize, s: u32,
226 _cpu: CpuFeatureLevel,
227 ) {
228 sgrproj_box_ab_internal::<BD>(
229 2,
230 af,
231 bf,
232 iimg,
233 iimg_sq,
234 iimg_stride,
235 0,
236 y,
237 stripe_w,
238 s,
239 );
240 }
241
242 pub(crate) fn sgrproj_box_f_r0<T: Pixel>(
243 f: &mut [u32], y: usize, w: usize, cdeffed: &PlaneSlice<T>,
244 _cpu: CpuFeatureLevel,
245 ) {
246 sgrproj_box_f_r0_internal(f, 0, y, w, cdeffed);
247 }
248
249 #[inline(always)]
250 pub(crate) fn sgrproj_box_f_r0_internal<T: Pixel>(
251 f: &mut [u32], start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
252 ) {
253 let line = cdeffed.row(y);
254 for (fp, &v) in f[start_x..w].iter_mut().zip(line[start_x..w].iter()) {
255 *fp = u32::cast_from(v) << SGRPROJ_RST_BITS;
256 }
257 }
258
259 pub(crate) fn sgrproj_box_f_r1<T: Pixel>(
260 af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], y: usize, w: usize,
261 cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
262 ) {
263 sgrproj_box_f_r1_internal(af, bf, f, 0, y, w, cdeffed);
264 }
265
266 #[inline(always)]
267 pub(crate) fn sgrproj_box_f_r1_internal<T: Pixel>(
268 af: &[&[u32]; 3], bf: &[&[u32]; 3], f: &mut [u32], start_x: usize,
269 y: usize, w: usize, cdeffed: &PlaneSlice<T>,
270 ) {
271 let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
272 let line = cdeffed.row(y);
273 for x in start_x..w {
274 let a = 3 * (af[0][x] + af[2][x] + af[0][x + 2] + af[2][x + 2])
275 + 4
276 * (af[1][x]
277 + af[0][x + 1]
278 + af[1][x + 1]
279 + af[2][x + 1]
280 + af[1][x + 2]);
281 let b = 3 * (bf[0][x] + bf[2][x] + bf[0][x + 2] + bf[2][x + 2])
282 + 4
283 * (bf[1][x]
284 + bf[0][x + 1]
285 + bf[1][x + 1]
286 + bf[2][x + 1]
287 + bf[1][x + 2]);
288 let v = a * u32::cast_from(line[x]) + b;
289 f[x] = (v + (1 << shift >> 1)) >> shift;
290 }
291 }
292
293 pub(crate) fn sgrproj_box_f_r2<T: Pixel>(
294 af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
295 y: usize, w: usize, cdeffed: &PlaneSlice<T>, _cpu: CpuFeatureLevel,
296 ) {
297 sgrproj_box_f_r2_internal(af, bf, f0, f1, 0, y, w, cdeffed);
298 }
299
300 #[inline(always)]
301 pub(crate) fn sgrproj_box_f_r2_internal<T: Pixel>(
302 af: &[&[u32]; 2], bf: &[&[u32]; 2], f0: &mut [u32], f1: &mut [u32],
303 start_x: usize, y: usize, w: usize, cdeffed: &PlaneSlice<T>,
304 ) {
305 let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
306 let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
307 let line = cdeffed.row(y);
308 let line1 = cdeffed.row(y + 1);
309
310 let af0 = af[0][start_x..w + 3].windows(3);
311 let af1 = af[1][start_x..w + 3].windows(3);
312 let bf0 = bf[0][start_x..w + 3].windows(3);
313 let bf1 = bf[1][start_x..w + 3].windows(3);
314
315 let af_it = af0.zip(af1);
316 let bf_it = bf0.zip(bf1);
317
318 let in0 = line[start_x..w].iter();
319 let in1 = line1[start_x..w].iter();
320
321 let o0 = f0[start_x..w].iter_mut();
322 let o1 = f1[start_x..w].iter_mut();
323
324 let in_iter = in0.zip(in1);
325 let out_iter = o0.zip(o1);
326
327 let io_iter = out_iter.zip(in_iter);
328
329 for (((o0, o1), (&p0, &p1)), ((af_0, af_1), (bf_0, bf_1))) in
330 io_iter.zip(af_it.zip(bf_it))
331 {
332 let a = 5 * (af_0[0] + af_0[2]) + 6 * af_0[1];
333 let b = 5 * (bf_0[0] + bf_0[2]) + 6 * bf_0[1];
334 let ao = 5 * (af_1[0] + af_1[2]) + 6 * af_1[1];
335 let bo = 5 * (bf_1[0] + bf_1[2]) + 6 * bf_1[1];
336 let v = (a + ao) * u32::cast_from(p0) + b + bo;
337 *o0 = (v + (1 << shift >> 1)) >> shift;
338 let vo = ao * u32::cast_from(p1) + bo;
339 *o1 = (vo + (1 << shifto >> 1)) >> shifto;
340 }
341 }
342}
343
344#[inline(always)]
345fn sgrproj_sum_finish<const BD: usize>(
346 ssq: u32, sum: u32, n: u32, one_over_n: u32, s: u32,
347) -> (u32, u32) {
348 let bdm8 = BD - 8;
349 let scaled_ssq = (ssq + (1 << (2 * bdm8) >> 1)) >> (2 * bdm8);
350 let scaled_sum = (sum + (1 << bdm8 >> 1)) >> bdm8;
351 let p = (scaled_ssq * n).saturating_sub(scaled_sum * scaled_sum);
352 let z = (p * s + (1 << SGRPROJ_MTABLE_BITS >> 1)) >> SGRPROJ_MTABLE_BITS;
353 let a = if z >= 255 {
354 256
355 } else if z == 0 {
356 1
357 } else {
358 ((z << SGRPROJ_SGR_BITS) + z / 2) / (z + 1)
359 };
360 let b = ((1 << SGRPROJ_SGR_BITS) - a) * sum * one_over_n;
361 (a, (b + (1 << SGRPROJ_RECIP_BITS >> 1)) >> SGRPROJ_RECIP_BITS)
362}
363
364#[inline(always)]
367unsafe fn get_integral_square(
368 iimg: &[u32], stride: usize, x: usize, y: usize, size: usize,
369) -> u32 {
370 let top_left = *iimg.get_unchecked(y * stride + x);
372 let top_right = *iimg.get_unchecked(y * stride + x + size);
373 let bottom_left = *iimg.get_unchecked((y + size) * stride + x);
374 let bottom_right = *iimg.get_unchecked((y + size) * stride + x + size);
375 top_left
376 .wrapping_add(bottom_right)
377 .wrapping_sub(bottom_left)
378 .wrapping_sub(top_right)
379}
380
381struct VertPaddedIter<'a, T: Pixel> {
382 deblocked: &'a Plane<T>,
384 cdeffed: &'a Plane<T>,
385 x: isize,
387 y: isize,
389 end: isize,
391 stripe_begin: isize,
394 stripe_end: isize,
397 crop: isize,
400}
401
402impl<'a, T: Pixel> VertPaddedIter<'a, T> {
403 fn new(
404 cdeffed: &PlaneSlice<'a, T>, deblocked: &PlaneSlice<'a, T>,
405 stripe_h: usize, crop: usize,
406 ) -> VertPaddedIter<'a, T> {
407 assert_eq!(cdeffed.x, deblocked.x);
411 assert_eq!(cdeffed.y, deblocked.y);
412
413 let r = 2;
415
416 let rows_above = r + 2;
418 let rows_below = 2;
419
420 let crop = crop as isize + deblocked.y;
423 let stripe_end = stripe_h as isize + deblocked.y;
424
425 let y = deblocked.y - rows_above as isize;
428
429 VertPaddedIter {
430 deblocked: deblocked.plane,
431 cdeffed: cdeffed.plane,
432 x: deblocked.x,
433 y,
434 end: (rows_above + stripe_h + rows_below) as isize + y,
435 stripe_begin: deblocked.y,
436 stripe_end,
437 crop,
438 }
439 }
440}
441
442impl<'a, T: Pixel> Iterator for VertPaddedIter<'a, T> {
443 type Item = &'a [T];
444
445 #[inline(always)]
446 fn next(&mut self) -> Option<Self::Item> {
447 if self.end > self.y {
448 let cropped_y = clamp(self.y, 0, self.crop - 1);
451 let ly = clamp(cropped_y, self.stripe_begin - 2, self.stripe_end + 1);
453
454 let src_plane = if ly >= self.stripe_begin && ly < self.stripe_end {
456 self.cdeffed
457 } else {
458 self.deblocked
459 };
460 let range = src_plane.row_range(self.x, ly);
462 self.y += 1;
463 Some(&src_plane.data[range])
464 } else {
465 None
466 }
467 }
468
469 fn size_hint(&self) -> (usize, Option<usize>) {
470 let remaining = self.end - self.y;
471 debug_assert!(remaining >= 0);
472 let remaining = remaining as usize;
473
474 (remaining, Some(remaining))
475 }
476}
477
478impl<T: Pixel> ExactSizeIterator for VertPaddedIter<'_, T> {}
479impl<T: Pixel> FusedIterator for VertPaddedIter<'_, T> {}
480
481struct HorzPaddedIter<'a, T: Pixel> {
482 slice: &'a [T],
484 index: isize,
488 end: usize,
490}
491
492impl<'a, T: Pixel> HorzPaddedIter<'a, T> {
493 fn new(
494 slice: &'a [T], start_index: isize, width: usize,
495 ) -> HorzPaddedIter<'a, T> {
496 HorzPaddedIter {
497 slice,
498 index: start_index,
499 end: (width as isize + start_index) as usize,
500 }
501 }
502}
503
504impl<'a, T: Pixel> Iterator for HorzPaddedIter<'a, T> {
505 type Item = &'a T;
506
507 #[inline(always)]
508 fn next(&mut self) -> Option<Self::Item> {
509 if self.index < self.end as isize {
510 let x = clamp(self.index, 0, self.slice.len() as isize - 1) as usize;
512 self.index += 1;
513 Some(&self.slice[x])
514 } else {
515 None
516 }
517 }
518
519 #[inline(always)]
520 fn size_hint(&self) -> (usize, Option<usize>) {
521 let size: usize = (self.end as isize - self.index) as usize;
522 (size, Some(size))
523 }
524}
525
526impl<T: Pixel> ExactSizeIterator for HorzPaddedIter<'_, T> {}
527impl<T: Pixel> FusedIterator for HorzPaddedIter<'_, T> {}
528
529#[profiling::function]
530pub fn setup_integral_image<T: Pixel>(
531 integral_image_buffer: &mut IntegralImageBuffer,
532 integral_image_stride: usize, crop_w: usize, crop_h: usize, stripe_w: usize,
533 stripe_h: usize, cdeffed: &PlaneSlice<T>, deblocked: &PlaneSlice<T>,
534) {
535 let integral_image = &mut integral_image_buffer.integral_image;
536 let sq_integral_image = &mut integral_image_buffer.sq_integral_image;
537
538 let left_w = 4; let right_w = 3; assert_eq!(cdeffed.x, deblocked.x);
543
544 let left_uniques = if cdeffed.x == 0 { 0 } else { left_w };
546 let right_uniques = right_w.min(crop_w - stripe_w);
547
548 let row_uniques = left_uniques + stripe_w + right_uniques;
550
551 let start_index_x = if cdeffed.x == 0 { -(left_w as isize) } else { 0 };
553
554 let mut rows_iter = VertPaddedIter::new(
555 &cdeffed.go_left(left_uniques),
557 &deblocked.go_left(left_uniques),
558 stripe_h + (stripe_h & 1),
560 crop_h,
561 )
562 .map(|row: &[T]| {
563 HorzPaddedIter::new(
564 &row[..row_uniques],
566 start_index_x,
567 left_w + stripe_w + right_w,
568 )
569 });
570
571 {
573 let mut sum: u32 = 0;
574 let mut sq_sum: u32 = 0;
575 let row = rows_iter.next().unwrap();
577 for (src, (integral, sq_integral)) in
578 row.zip(integral_image.iter_mut().zip(sq_integral_image.iter_mut()))
579 {
580 let current = u32::cast_from(*src);
581
582 sum = sum.wrapping_add(current);
585 *integral = sum;
586 sq_sum = sq_sum.wrapping_add(current * current);
587 *sq_integral = sq_sum;
588 }
589 }
590 let mut integral_slice = &mut integral_image[..];
592 let mut sq_integral_slice = &mut sq_integral_image[..];
593 for row in rows_iter {
594 let mut sum: u32 = 0;
595 let mut sq_sum: u32 = 0;
596
597 let (integral_row_prev, integral_row) =
601 integral_slice.split_at_mut(integral_image_stride);
602 let (sq_integral_row_prev, sq_integral_row) =
603 sq_integral_slice.split_at_mut(integral_image_stride);
604 for (
605 src,
606 ((integral_above, sq_integral_above), (integral, sq_integral)),
607 ) in row.zip(
608 integral_row_prev
609 .iter()
610 .zip(sq_integral_row_prev.iter())
611 .zip(integral_row.iter_mut().zip(sq_integral_row.iter_mut())),
612 ) {
613 let current = u32::cast_from(*src);
614 sum = sum.wrapping_add(current);
617 *integral = sum.wrapping_add(*integral_above);
618 sq_sum = sq_sum.wrapping_add(current * current);
619 *sq_integral = sq_sum.wrapping_add(*sq_integral_above);
620 }
621
622 integral_slice = integral_row;
625 sq_integral_slice = sq_integral_row;
626 }
627}
628
629#[profiling::function]
630pub fn sgrproj_stripe_filter<T: Pixel, U: Pixel>(
631 set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
632 integral_image_buffer: &IntegralImageBuffer, integral_image_stride: usize,
633 cdeffed: &PlaneSlice<U>, out: &mut PlaneRegionMut<U>,
634) {
635 let &Rect { width: stripe_w, height: stripe_h, .. } = out.rect();
636 let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
637 [[0; IMAGE_WIDTH_MAX + 2]; 2];
638 let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
639 [[0; IMAGE_WIDTH_MAX + 2]; 2];
640 let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
641 let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
642 let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
643 [[0; IMAGE_WIDTH_MAX + 2]; 3];
644 let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
645 [[0; IMAGE_WIDTH_MAX + 2]; 3];
646 let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
647
648 let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
649 let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
650
651 let fn_ab_r1 = match fi.sequence.bit_depth {
652 8 => sgrproj_box_ab_r1::<8>,
653 10 => sgrproj_box_ab_r1::<10>,
654 12 => sgrproj_box_ab_r1::<12>,
655 _ => unimplemented!(),
656 };
657 let fn_ab_r2 = match fi.sequence.bit_depth {
658 8 => sgrproj_box_ab_r2::<8>,
659 10 => sgrproj_box_ab_r2::<10>,
660 12 => sgrproj_box_ab_r2::<12>,
661 _ => unimplemented!(),
662 };
663
664 let integral_image = &integral_image_buffer.integral_image;
670 let sq_integral_image = &integral_image_buffer.sq_integral_image;
671 if s_r2 > 0 {
672 fn_ab_r2(
673 &mut a_r2[0],
674 &mut b_r2[0],
675 integral_image,
676 sq_integral_image,
677 integral_image_stride,
678 0,
679 stripe_w,
680 s_r2,
681 fi.cpu_feature_level,
682 );
683 }
684 if s_r1 > 0 {
685 let integral_image_offset = integral_image_stride + 1;
686 fn_ab_r1(
687 &mut a_r1[0],
688 &mut b_r1[0],
689 &integral_image[integral_image_offset..],
690 &sq_integral_image[integral_image_offset..],
691 integral_image_stride,
692 0,
693 stripe_w,
694 s_r1,
695 fi.cpu_feature_level,
696 );
697 fn_ab_r1(
698 &mut a_r1[1],
699 &mut b_r1[1],
700 &integral_image[integral_image_offset..],
701 &sq_integral_image[integral_image_offset..],
702 integral_image_stride,
703 1,
704 stripe_w,
705 s_r1,
706 fi.cpu_feature_level,
707 );
708 }
709
710 for y in (0..stripe_h).step_by(2) {
714 let f_r2_ab: [&[u32]; 2] = if s_r2 > 0 {
716 fn_ab_r2(
717 &mut a_r2[(y / 2 + 1) % 2],
718 &mut b_r2[(y / 2 + 1) % 2],
719 integral_image,
720 sq_integral_image,
721 integral_image_stride,
722 y + 2,
723 stripe_w,
724 s_r2,
725 fi.cpu_feature_level,
726 );
727 let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
728 let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
729 sgrproj_box_f_r2(
730 &ap0,
731 &bp0,
732 &mut f_r2_0,
733 &mut f_r2_1,
734 y,
735 stripe_w,
736 cdeffed,
737 fi.cpu_feature_level,
738 );
739 [&f_r2_0, &f_r2_1]
740 } else {
741 sgrproj_box_f_r0(
742 &mut f_r2_0,
743 y,
744 stripe_w,
745 cdeffed,
746 fi.cpu_feature_level,
747 );
748 [&f_r2_0, &f_r2_0]
750 };
751 for dy in 0..(2.min(stripe_h - y)) {
752 let y = y + dy;
753 if s_r1 > 0 {
754 let integral_image_offset = integral_image_stride + 1;
755 fn_ab_r1(
756 &mut a_r1[(y + 2) % 3],
757 &mut b_r1[(y + 2) % 3],
758 &integral_image[integral_image_offset..],
759 &sq_integral_image[integral_image_offset..],
760 integral_image_stride,
761 y + 2,
762 stripe_w,
763 s_r1,
764 fi.cpu_feature_level,
765 );
766 let ap1: [&[u32]; 3] =
767 [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
768 let bp1: [&[u32]; 3] =
769 [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
770 sgrproj_box_f_r1(
771 &ap1,
772 &bp1,
773 &mut f_r1,
774 y,
775 stripe_w,
776 cdeffed,
777 fi.cpu_feature_level,
778 );
779 } else {
780 sgrproj_box_f_r0(
781 &mut f_r1,
782 y,
783 stripe_w,
784 cdeffed,
785 fi.cpu_feature_level,
786 );
787 }
788
789 let w0 = xqd[0] as i32;
791 let w1 = xqd[1] as i32;
792 let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
793
794 let line = &cdeffed[y];
795
796 #[inline(always)]
797 fn apply_filter<U: Pixel>(
798 out: &mut [U], line: &[U], f_r1: &[u32], f_r2_ab: &[u32],
799 stripe_w: usize, bit_depth: usize, w0: i32, w1: i32, w2: i32,
800 ) {
801 let line_it = line[..stripe_w].iter();
802 let f_r2_ab_it = f_r2_ab[..stripe_w].iter();
803 let f_r1_it = f_r1[..stripe_w].iter();
804 let out_it = out[..stripe_w].iter_mut();
805
806 for ((o, &u), (&f_r2_ab, &f_r1)) in
807 out_it.zip(line_it).zip(f_r2_ab_it.zip(f_r1_it))
808 {
809 let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
810 let v = w0 * f_r2_ab as i32 + w1 * u + w2 * f_r1 as i32;
811 let s = (v + (1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) >> 1))
812 >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS);
813 *o = U::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
814 }
815 }
816
817 apply_filter(
818 &mut out[y],
819 line,
820 &f_r1,
821 f_r2_ab[dy],
822 stripe_w,
823 fi.sequence.bit_depth,
824 w0,
825 w1,
826 w2,
827 );
828 }
829 }
830}
831
832#[profiling::function]
847pub fn sgrproj_solve<T: Pixel>(
848 set: u8, fi: &FrameInvariants<T>,
849 integral_image_buffer: &IntegralImageBuffer, input: &PlaneRegion<'_, T>,
850 cdeffed: &PlaneSlice<T>, cdef_w: usize, cdef_h: usize,
851) -> (i8, i8) {
852 let mut a_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
853 [[0; IMAGE_WIDTH_MAX + 2]; 2];
854 let mut b_r2: [[u32; IMAGE_WIDTH_MAX + 2]; 2] =
855 [[0; IMAGE_WIDTH_MAX + 2]; 2];
856 let mut f_r2_0: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
857 let mut f_r2_1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
858 let mut a_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
859 [[0; IMAGE_WIDTH_MAX + 2]; 3];
860 let mut b_r1: [[u32; IMAGE_WIDTH_MAX + 2]; 3] =
861 [[0; IMAGE_WIDTH_MAX + 2]; 3];
862 let mut f_r1: [u32; IMAGE_WIDTH_MAX] = [0; IMAGE_WIDTH_MAX];
863
864 let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
865 let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
866
867 let mut h: [[f64; 2]; 2] = [[0., 0.], [0., 0.]];
868 let mut c: [f64; 2] = [0., 0.];
869
870 let fn_ab_r1 = match fi.sequence.bit_depth {
871 8 => sgrproj_box_ab_r1::<8>,
872 10 => sgrproj_box_ab_r1::<10>,
873 12 => sgrproj_box_ab_r1::<12>,
874 _ => unimplemented!(),
875 };
876 let fn_ab_r2 = match fi.sequence.bit_depth {
877 8 => sgrproj_box_ab_r2::<8>,
878 10 => sgrproj_box_ab_r2::<10>,
879 12 => sgrproj_box_ab_r2::<12>,
880 _ => unimplemented!(),
881 };
882
883 let integral_image = &integral_image_buffer.integral_image;
889 let sq_integral_image = &integral_image_buffer.sq_integral_image;
890 if s_r2 > 0 {
891 fn_ab_r2(
892 &mut a_r2[0],
893 &mut b_r2[0],
894 integral_image,
895 sq_integral_image,
896 SOLVE_IMAGE_STRIDE,
897 0,
898 cdef_w,
899 s_r2,
900 fi.cpu_feature_level,
901 );
902 }
903 if s_r1 > 0 {
904 let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
905 fn_ab_r1(
906 &mut a_r1[0],
907 &mut b_r1[0],
908 &integral_image[integral_image_offset..],
909 &sq_integral_image[integral_image_offset..],
910 SOLVE_IMAGE_STRIDE,
911 0,
912 cdef_w,
913 s_r1,
914 fi.cpu_feature_level,
915 );
916 fn_ab_r1(
917 &mut a_r1[1],
918 &mut b_r1[1],
919 &integral_image[integral_image_offset..],
920 &sq_integral_image[integral_image_offset..],
921 SOLVE_IMAGE_STRIDE,
922 1,
923 cdef_w,
924 s_r1,
925 fi.cpu_feature_level,
926 );
927 }
928
929 for y in (0..cdef_h).step_by(2) {
933 let f_r2_01: [&[u32]; 2] = if s_r2 > 0 {
935 fn_ab_r2(
936 &mut a_r2[(y / 2 + 1) % 2],
937 &mut b_r2[(y / 2 + 1) % 2],
938 integral_image,
939 sq_integral_image,
940 SOLVE_IMAGE_STRIDE,
941 y + 2,
942 cdef_w,
943 s_r2,
944 fi.cpu_feature_level,
945 );
946 let ap0: [&[u32]; 2] = [&a_r2[(y / 2) % 2], &a_r2[(y / 2 + 1) % 2]];
947 let bp0: [&[u32]; 2] = [&b_r2[(y / 2) % 2], &b_r2[(y / 2 + 1) % 2]];
948 sgrproj_box_f_r2(
949 &ap0,
950 &bp0,
951 &mut f_r2_0,
952 &mut f_r2_1,
953 y,
954 cdef_w,
955 cdeffed,
956 fi.cpu_feature_level,
957 );
958 [&f_r2_0, &f_r2_1]
959 } else {
960 sgrproj_box_f_r0(&mut f_r2_0, y, cdef_w, cdeffed, fi.cpu_feature_level);
961 [&f_r2_0, &f_r2_0]
963 };
964 for dy in 0..(2.min(cdef_h - y)) {
965 let y = y + dy;
966 if s_r1 > 0 {
967 let integral_image_offset = SOLVE_IMAGE_STRIDE + 1;
968 fn_ab_r1(
969 &mut a_r1[(y + 2) % 3],
970 &mut b_r1[(y + 2) % 3],
971 &integral_image[integral_image_offset..],
972 &sq_integral_image[integral_image_offset..],
973 SOLVE_IMAGE_STRIDE,
974 y + 2,
975 cdef_w,
976 s_r1,
977 fi.cpu_feature_level,
978 );
979 let ap1: [&[u32]; 3] =
980 [&a_r1[y % 3], &a_r1[(y + 1) % 3], &a_r1[(y + 2) % 3]];
981 let bp1: [&[u32]; 3] =
982 [&b_r1[y % 3], &b_r1[(y + 1) % 3], &b_r1[(y + 2) % 3]];
983 sgrproj_box_f_r1(
984 &ap1,
985 &bp1,
986 &mut f_r1,
987 y,
988 cdef_w,
989 cdeffed,
990 fi.cpu_feature_level,
991 );
992 } else {
993 sgrproj_box_f_r0(&mut f_r1, y, cdef_w, cdeffed, fi.cpu_feature_level);
994 }
995
996 #[inline(always)]
997 fn process_line<T: Pixel>(
998 h: &mut [[f64; 2]; 2], c: &mut [f64; 2], cdeffed: &[T], input: &[T],
999 f_r1: &[u32], f_r2_ab: &[u32], cdef_w: usize,
1000 ) {
1001 let cdeffed_it = cdeffed[..cdef_w].iter();
1002 let input_it = input[..cdef_w].iter();
1003 let f_r2_ab_it = f_r2_ab[..cdef_w].iter();
1004 let f_r1_it = f_r1[..cdef_w].iter();
1005
1006 #[derive(Debug, Copy, Clone)]
1007 struct Sums {
1008 h: [[i64; 2]; 2],
1009 c: [i64; 2],
1010 }
1011
1012 let sums: Sums = cdeffed_it
1013 .zip(input_it)
1014 .zip(f_r2_ab_it.zip(f_r1_it))
1015 .map(|((&u, &i), (&f2, &f1))| {
1016 let u = i32::cast_from(u) << SGRPROJ_RST_BITS;
1017 let s = (i32::cast_from(i) << SGRPROJ_RST_BITS) - u;
1018 let f2 = f2 as i32 - u;
1019 let f1 = f1 as i32 - u;
1020 (s as i64, f1 as i64, f2 as i64)
1021 })
1022 .fold(Sums { h: [[0; 2]; 2], c: [0; 2] }, |sums, (s, f1, f2)| {
1023 let mut ret: Sums = sums;
1024 ret.h[0][0] += f2 * f2;
1025 ret.h[1][1] += f1 * f1;
1026 ret.h[0][1] += f1 * f2;
1027 ret.c[0] += f2 * s;
1028 ret.c[1] += f1 * s;
1029 ret
1030 });
1031
1032 h[0][0] += sums.h[0][0] as f64;
1033 h[1][1] += sums.h[1][1] as f64;
1034 h[0][1] += sums.h[0][1] as f64;
1035 c[0] += sums.c[0] as f64;
1036 c[1] += sums.c[1] as f64;
1037 }
1038
1039 process_line(
1040 &mut h,
1041 &mut c,
1042 &cdeffed[y],
1043 &input[y],
1044 &f_r1,
1045 f_r2_01[dy],
1046 cdef_w,
1047 );
1048 }
1049 }
1050
1051 let n = cdef_w as f64 * cdef_h as f64;
1053 h[0][0] /= n;
1054 h[0][1] /= n;
1055 h[1][1] /= n;
1056 h[1][0] = h[0][1];
1057 c[0] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1058 c[1] *= (1 << SGRPROJ_PRJ_BITS) as f64 / n;
1059 let (xq0, xq1) = if s_r2 == 0 {
1060 if h[1][1] == 0. {
1063 (0, 0)
1064 } else {
1065 (0, (c[1] / h[1][1]).round() as i32)
1066 }
1067 } else if s_r1 == 0 {
1068 if h[0][0] == 0. {
1071 (0, 0)
1072 } else {
1073 ((c[0] / h[0][0]).round() as i32, 0)
1074 }
1075 } else {
1076 let det = h[0][0].mul_add(h[1][1], -h[0][1] * h[1][0]);
1077 if det == 0. {
1078 (0, 0)
1079 } else {
1080 let div1 = h[1][1].mul_add(c[0], -h[0][1] * c[1]);
1082 let div2 = h[0][0].mul_add(c[1], -h[1][0] * c[0]);
1083 ((div1 / det).round() as i32, (div2 / det).round() as i32)
1084 }
1085 };
1086 {
1087 let xqd0 =
1088 clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32);
1089 let xqd1 = clamp(
1090 (1 << SGRPROJ_PRJ_BITS) - xqd0 - xq1,
1091 SGRPROJ_XQD_MIN[1] as i32,
1092 SGRPROJ_XQD_MAX[1] as i32,
1093 );
1094 (xqd0 as i8, xqd1 as i8)
1095 }
1096}
1097
1098#[profiling::function]
1099fn wiener_stripe_filter<T: Pixel>(
1100 coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>, crop_w: usize, crop_h: usize,
1101 stripe_w: usize, stripe_h: usize, stripe_x: usize, stripe_y: isize,
1102 cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>,
1103) {
1104 let bit_depth = fi.sequence.bit_depth;
1105 let round_h = if bit_depth == 12 { 5 } else { 3 };
1106 let round_v = if bit_depth == 12 { 9 } else { 11 };
1107 let offset = 1 << (bit_depth + WIENER_BITS - round_h - 1);
1108 let limit = (1 << (bit_depth + 1 + WIENER_BITS - round_h)) - 1;
1109
1110 let mut coeffs_ = [[0; 3]; 2];
1111 for i in 0..2 {
1112 for j in 0..3 {
1113 coeffs_[i][j] = i32::from(coeffs[i][j]);
1114 }
1115 }
1116
1117 let mut work: [i32; SB_SIZE + 7] = [0; SB_SIZE + 7];
1118 let vfilter: [i32; 7] = [
1119 coeffs_[0][0],
1120 coeffs_[0][1],
1121 coeffs_[0][2],
1122 128 - 2 * (coeffs_[0][0] + coeffs_[0][1] + coeffs_[0][2]),
1123 coeffs_[0][2],
1124 coeffs_[0][1],
1125 coeffs_[0][0],
1126 ];
1127 let hfilter: [i32; 7] = [
1128 coeffs_[1][0],
1129 coeffs_[1][1],
1130 coeffs_[1][2],
1131 128 - 2 * (coeffs_[1][0] + coeffs_[1][1] + coeffs_[1][2]),
1132 coeffs_[1][2],
1133 coeffs_[1][1],
1134 coeffs_[1][0],
1135 ];
1136
1137 let start_wi = if stripe_y < 0 { -stripe_y } else { 0 } as usize;
1140 let start_yi = if stripe_y < 0 { 0 } else { stripe_y } as usize;
1141 let end_i = cmp::max(
1142 0,
1143 if stripe_h as isize + stripe_y > crop_h as isize {
1144 crop_h as isize - stripe_y - start_wi as isize
1145 } else {
1146 stripe_h as isize - start_wi as isize
1147 },
1148 ) as usize;
1149
1150 let mut out_slice =
1151 out.mut_slice(PlaneOffset { x: 0, y: start_yi as isize });
1152
1153 for xi in stripe_x..stripe_x + stripe_w {
1154 let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
1155 for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
1156 let mut acc = 0;
1157 let src = if yi < stripe_y {
1158 let ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2);
1159 deblocked.row(ly)
1160 } else if yi < stripe_y + stripe_h as isize {
1161 let ly = clamp(yi, 0, crop_h as isize - 1);
1162 cdeffed.row(ly)
1163 } else {
1164 let ly = cmp::min(
1165 clamp(yi, 0, crop_h as isize - 1),
1166 stripe_y + stripe_h as isize + 1,
1167 );
1168 deblocked.row(ly)
1169 };
1170 let start = i32::cast_from(src[0]);
1171 let end = i32::cast_from(src[crop_w - 1]);
1172 for i in 0..3 - xi as isize {
1173 acc += hfilter[i as usize] * start;
1174 }
1175
1176 let off = 3 - (xi as isize);
1177 let s = cmp::max(0, off) as usize;
1178 let s1 = (s as isize - off) as usize;
1179 let n1 = (n - off) as usize;
1180
1181 for (hf, &v) in hfilter[s..n as usize].iter().zip(src[s1..n1].iter()) {
1182 acc += hf * i32::cast_from(v);
1183 }
1184
1185 for i in n..7 {
1186 acc += hfilter[i as usize] * end;
1187 }
1188
1189 acc = (acc + (1 << round_h >> 1)) >> round_h;
1190 work[(yi - stripe_y + 3) as usize] = clamp(acc, -offset, limit - offset);
1191 }
1192
1193 for (wi, dst) in (start_wi..start_wi + end_i)
1194 .zip(out_slice.rows_iter_mut().map(|row| &mut row[xi]).take(end_i))
1195 {
1196 let mut acc = 0;
1197 for (i, src) in (0..7).zip(work[wi..wi + 7].iter_mut()) {
1198 acc += vfilter[i] * *src;
1199 }
1200 *dst = T::cast_from(clamp(
1201 (acc + (1 << round_v >> 1)) >> round_v,
1202 0,
1203 (1 << bit_depth) - 1,
1204 ));
1205 }
1206 }
1207}
1208
1209#[derive(Copy, Clone, Debug, Default)]
1210pub struct RestorationUnit {
1211 pub filter: RestorationFilter,
1212}
1213
1214#[derive(Clone, Debug)]
1215pub struct FrameRestorationUnits {
1216 units: Box<[RestorationUnit]>,
1217 pub cols: usize,
1218 pub rows: usize,
1219}
1220
1221impl FrameRestorationUnits {
1222 pub fn new(cols: usize, rows: usize) -> Self {
1223 Self {
1224 units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
1225 cols,
1226 rows,
1227 }
1228 }
1229}
1230
1231impl Index<usize> for FrameRestorationUnits {
1232 type Output = [RestorationUnit];
1233 #[inline(always)]
1234 fn index(&self, index: usize) -> &Self::Output {
1235 &self.units[index * self.cols..(index + 1) * self.cols]
1236 }
1237}
1238
1239impl IndexMut<usize> for FrameRestorationUnits {
1240 #[inline(always)]
1241 fn index_mut(&mut self, index: usize) -> &mut Self::Output {
1242 &mut self.units[index * self.cols..(index + 1) * self.cols]
1243 }
1244}
1245
1246#[derive(Clone, Debug)]
1247pub struct RestorationPlaneConfig {
1248 pub lrf_type: u8,
1249 pub unit_size: usize,
1250 pub sb_h_shift: usize,
1253 pub sb_v_shift: usize,
1254 pub sb_cols: usize, pub sb_rows: usize, pub stripe_height: usize,
1259 pub cols: usize,
1260 pub rows: usize,
1261}
1262
1263#[derive(Clone, Debug)]
1264pub struct RestorationPlane {
1265 pub cfg: RestorationPlaneConfig,
1266 pub units: FrameRestorationUnits,
1267}
1268
1269impl RestorationPlane {
1270 pub fn new(
1271 lrf_type: u8, unit_size: usize, sb_h_shift: usize, sb_v_shift: usize,
1272 sb_cols: usize, sb_rows: usize, stripe_decimate: usize, cols: usize,
1273 rows: usize,
1274 ) -> RestorationPlane {
1275 let stripe_height = if stripe_decimate != 0 { 32 } else { 64 };
1276 RestorationPlane {
1277 cfg: RestorationPlaneConfig {
1278 lrf_type,
1279 unit_size,
1280 sb_h_shift,
1281 sb_v_shift,
1282 sb_cols,
1283 sb_rows,
1284 stripe_height,
1285 cols,
1286 rows,
1287 },
1288 units: FrameRestorationUnits::new(cols, rows),
1289 }
1290 }
1291
1292 fn restoration_unit_index_by_stripe(
1296 &self, stripenum: usize, rux: usize,
1297 ) -> (usize, usize) {
1298 (
1299 cmp::min(rux, self.cfg.cols - 1),
1300 cmp::min(
1301 stripenum * self.cfg.stripe_height / self.cfg.unit_size,
1302 self.cfg.rows - 1,
1303 ),
1304 )
1305 }
1306
1307 pub fn restoration_unit_by_stripe(
1308 &self, stripenum: usize, rux: usize,
1309 ) -> &RestorationUnit {
1310 let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
1311 &self.units[y][x]
1312 }
1313}
1314
1315#[derive(Clone, Debug)]
1316pub struct RestorationState {
1317 pub planes: [RestorationPlane; MAX_PLANES],
1318}
1319
1320impl RestorationState {
1321 pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
1322 let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
1323 let stripe_uv_decimate = usize::from(xdec > 0 && ydec > 0);
1325 let y_sb_log2 = if fi.sequence.use_128x128_superblock { 7 } else { 6 };
1326 let uv_sb_h_log2 = y_sb_log2 - xdec;
1327 let uv_sb_v_log2 = y_sb_log2 - ydec;
1328
1329 let (lrf_y_shift, lrf_uv_shift) = if fi.sequence.enable_large_lru
1330 && fi.sequence.enable_restoration
1331 {
1332 assert!(
1333 fi.width > 1 && fi.height > 1,
1334 "Width and height must be higher than 1 for LRF setup"
1335 );
1336
1337 let lrf_base_shift = if fi.base_q_idx > 200 {
1340 0 } else if fi.base_q_idx > 160 {
1342 1
1343 } else {
1344 2 };
1346 let lrf_chroma_shift = if stripe_uv_decimate > 0 {
1347 if lrf_base_shift == 2 {
1349 1 } else {
1351 let lrf_unit_size =
1354 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_base_shift);
1355 let unshifted_stretch = ((fi.width >> xdec) - 1) % lrf_unit_size
1356 <= lrf_unit_size / 2
1357 || ((fi.height >> ydec) - 1) % lrf_unit_size <= lrf_unit_size / 2;
1358 let shifted_stretch = ((fi.width >> xdec) - 1)
1359 % (lrf_unit_size >> 1)
1360 <= lrf_unit_size / 4
1361 || ((fi.height >> ydec) - 1) % (lrf_unit_size >> 1)
1362 <= lrf_unit_size / 4;
1363 usize::from(unshifted_stretch && !shifted_stretch)
1366 }
1367 } else {
1368 0
1369 };
1370 (lrf_base_shift, lrf_base_shift + lrf_chroma_shift)
1371 } else {
1372 let lrf_y_shift = if fi.sequence.use_128x128_superblock { 1 } else { 2 };
1375 (lrf_y_shift, lrf_y_shift + stripe_uv_decimate)
1376 };
1377
1378 let mut y_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift);
1379 let mut uv_unit_size = 1 << (RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift);
1380
1381 let tiling = fi.sequence.tiling;
1382 if tiling.cols > 1 || tiling.rows > 1 {
1386 let trailing_h_zeros = tiling.tile_width_sb.trailing_zeros() as usize;
1389 let trailing_v_zeros = tiling.tile_height_sb.trailing_zeros() as usize;
1390 let tile_aligned_y_unit_size =
1391 1 << (y_sb_log2 + trailing_h_zeros.min(trailing_v_zeros));
1392 let tile_aligned_uv_h_unit_size = 1 << (uv_sb_h_log2 + trailing_h_zeros);
1393 let tile_aligned_uv_v_unit_size = 1 << (uv_sb_v_log2 + trailing_v_zeros);
1394 y_unit_size = y_unit_size.min(tile_aligned_y_unit_size);
1395 uv_unit_size = uv_unit_size
1396 .min(tile_aligned_uv_h_unit_size.min(tile_aligned_uv_v_unit_size));
1397
1398 }
1413
1414 if ydec == 0 && y_unit_size != uv_unit_size {
1424 y_unit_size = uv_unit_size.min(y_unit_size);
1425 uv_unit_size = y_unit_size;
1426 }
1427
1428 let y_unit_log2 = y_unit_size.ilog() - 1;
1430 let uv_unit_log2 = uv_unit_size.ilog() - 1;
1431 let y_cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
1432 let y_rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
1433 let uv_cols = ((((fi.width + (1 << xdec >> 1)) >> xdec)
1434 + (uv_unit_size >> 1))
1435 / uv_unit_size)
1436 .max(1);
1437 let uv_rows = ((((fi.height + (1 << ydec >> 1)) >> ydec)
1438 + (uv_unit_size >> 1))
1439 / uv_unit_size)
1440 .max(1);
1441
1442 RestorationState {
1443 planes: [
1444 RestorationPlane::new(
1445 RESTORE_SWITCHABLE,
1446 y_unit_size,
1447 y_unit_log2 - y_sb_log2,
1448 y_unit_log2 - y_sb_log2,
1449 fi.sb_width,
1450 fi.sb_height,
1451 0,
1452 y_cols,
1453 y_rows,
1454 ),
1455 RestorationPlane::new(
1456 RESTORE_SWITCHABLE,
1457 uv_unit_size,
1458 uv_unit_log2 - uv_sb_h_log2,
1459 uv_unit_log2 - uv_sb_v_log2,
1460 fi.sb_width,
1461 fi.sb_height,
1462 stripe_uv_decimate,
1463 uv_cols,
1464 uv_rows,
1465 ),
1466 RestorationPlane::new(
1467 RESTORE_SWITCHABLE,
1468 uv_unit_size,
1469 uv_unit_log2 - uv_sb_h_log2,
1470 uv_unit_log2 - uv_sb_v_log2,
1471 fi.sb_width,
1472 fi.sb_height,
1473 stripe_uv_decimate,
1474 uv_cols,
1475 uv_rows,
1476 ),
1477 ],
1478 }
1479 }
1480
1481 #[profiling::function]
1482 pub fn lrf_filter_frame<T: Pixel>(
1483 &mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
1484 fi: &FrameInvariants<T>,
1485 ) {
1486 let cdeffed = out.clone();
1487 let planes =
1488 if fi.sequence.chroma_sampling == Cs400 { 1 } else { MAX_PLANES };
1489
1490 let stripe_n = (fi.height + 7) / 64 + 1;
1497
1498 let mut stripe_filter_buffer =
1500 IntegralImageBuffer::zeroed(STRIPE_IMAGE_SIZE);
1501
1502 for pli in 0..planes {
1503 let rp = &self.planes[pli];
1504 let xdec = out.planes[pli].cfg.xdec;
1505 let ydec = out.planes[pli].cfg.ydec;
1506 let crop_w = (fi.width + (1 << xdec >> 1)) >> xdec;
1507 let crop_h = (fi.height + (1 << ydec >> 1)) >> ydec;
1508
1509 for si in 0..stripe_n {
1510 let (stripe_start_y, stripe_size) = if si == 0 {
1511 (0, (64 - 8) >> ydec)
1512 } else {
1513 let start = (si * 64 - 8) >> ydec;
1514 (
1515 start as isize,
1516 (64 >> ydec).min(crop_h - start),
1518 )
1519 };
1520
1521 for rux in 0..rp.cfg.cols {
1523 let x = rux * rp.cfg.unit_size;
1525 let size =
1526 if rux == rp.cfg.cols - 1 { crop_w - x } else { rp.cfg.unit_size };
1527 let ru = rp.restoration_unit_by_stripe(si, rux);
1528 match ru.filter {
1529 RestorationFilter::Wiener { coeffs } => {
1530 wiener_stripe_filter(
1531 coeffs,
1532 fi,
1533 crop_w,
1534 crop_h,
1535 size,
1536 stripe_size,
1537 x,
1538 stripe_start_y,
1539 &cdeffed.planes[pli],
1540 &pre_cdef.planes[pli],
1541 &mut out.planes[pli],
1542 );
1543 }
1544 RestorationFilter::Sgrproj { set, xqd } => {
1545 if !fi.sequence.enable_cdef {
1546 continue;
1547 }
1548
1549 setup_integral_image(
1550 &mut stripe_filter_buffer,
1551 STRIPE_IMAGE_STRIDE,
1552 crop_w - x,
1553 (crop_h as isize - stripe_start_y) as usize,
1554 size,
1555 stripe_size,
1556 &cdeffed.planes[pli]
1557 .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1558 &pre_cdef.planes[pli]
1559 .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1560 );
1561
1562 sgrproj_stripe_filter(
1563 set,
1564 xqd,
1565 fi,
1566 &stripe_filter_buffer,
1567 STRIPE_IMAGE_STRIDE,
1568 &cdeffed.planes[pli]
1569 .slice(PlaneOffset { x: x as isize, y: stripe_start_y }),
1570 &mut out.planes[pli].region_mut(Area::Rect {
1571 x: x as isize,
1572 y: stripe_start_y,
1573 width: size,
1574 height: stripe_size,
1575 }),
1576 );
1577 }
1578 RestorationFilter::None => {
1579 }
1581 }
1582 }
1583 }
1584 }
1585 }
1586}