1#![allow(non_upper_case_globals)]
11
12mod tables;
13
14cfg_if::cfg_if! {
15 if #[cfg(nasm_x86_64)] {
16 pub use crate::asm::x86::quantize::*;
17 } else {
18 pub use self::rust::*;
19 }
20}
21
22pub use tables::*;
23
24use crate::scan_order::av1_scan_orders;
25use crate::transform::{TxSize, TxType};
26use crate::util::*;
27use std::mem;
28use std::num::{NonZeroU16, NonZeroU32, NonZeroU64};
29
30pub fn get_log_tx_scale(tx_size: TxSize) -> usize {
31 let num_pixels = tx_size.area();
32
33 Into::<usize>::into(num_pixels > 256)
34 + Into::<usize>::into(num_pixels > 1024)
35}
36
37pub fn dc_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
38 let dc_q: [&[NonZeroU16; 256]; 3] =
39 [&dc_qlookup_Q3, &dc_qlookup_10_Q3, &dc_qlookup_12_Q3];
40 let bd = ((bit_depth ^ 8) >> 1).min(2);
41 dc_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
42}
43
44pub fn ac_q(qindex: u8, delta_q: i8, bit_depth: usize) -> NonZeroU16 {
45 let ac_q: [&[NonZeroU16; 256]; 3] =
46 [&ac_qlookup_Q3, &ac_qlookup_10_Q3, &ac_qlookup_12_Q3];
47 let bd = ((bit_depth ^ 8) >> 1).min(2);
48 ac_q[bd][((qindex as isize + delta_q as isize).max(0) as usize).min(255)]
49}
50
51fn select_qi(quantizer: i64, qlookup: &[NonZeroU16; QINDEX_RANGE]) -> u8 {
53 if quantizer < qlookup[MINQ].get() as i64 {
54 MINQ as u8
55 } else if quantizer >= qlookup[MAXQ].get() as i64 {
56 MAXQ as u8
57 } else {
58 match qlookup
59 .binary_search(&NonZeroU16::new(quantizer as u16).expect("Not zero"))
60 {
61 Ok(qi) => qi as u8,
62 Err(qi) => {
63 debug_assert!(qi > MINQ);
64 debug_assert!(qi <= MAXQ);
65 let qthresh =
67 (qlookup[qi - 1].get() as i32) * (qlookup[qi].get() as i32);
68 let q2_i32 = (quantizer as i32) * (quantizer as i32);
69 if q2_i32 < qthresh {
70 (qi - 1) as u8
71 } else {
72 qi as u8
73 }
74 }
75 }
76 }
77}
78
79pub fn select_dc_qi(quantizer: i64, bit_depth: usize) -> u8 {
80 let qlookup = match bit_depth {
81 8 => &dc_qlookup_Q3,
82 10 => &dc_qlookup_10_Q3,
83 12 => &dc_qlookup_12_Q3,
84 _ => unimplemented!(),
85 };
86 select_qi(quantizer, qlookup)
87}
88
89pub fn select_ac_qi(quantizer: i64, bit_depth: usize) -> u8 {
90 let qlookup = match bit_depth {
91 8 => &ac_qlookup_Q3,
92 10 => &ac_qlookup_10_Q3,
93 12 => &ac_qlookup_12_Q3,
94 _ => unimplemented!(),
95 };
96 select_qi(quantizer, qlookup)
97}
98
99#[derive(Debug, Clone, Copy)]
100pub struct QuantizationContext {
101 log_tx_scale: usize,
102 dc_quant: NonZeroU16,
103 dc_offset: u32,
104 dc_mul_add: (u32, u32, u32),
105
106 ac_quant: NonZeroU16,
107 ac_offset_eob: u32,
108 ac_offset0: u32,
109 ac_offset1: u32,
110 ac_mul_add: (u32, u32, u32),
111}
112
113impl Default for QuantizationContext {
114 fn default() -> Self {
115 QuantizationContext {
116 dc_quant: NonZeroU16::new(1).expect("Not zero"),
117 ac_quant: NonZeroU16::new(1).expect("Not zero"),
118 log_tx_scale: Default::default(),
119 dc_offset: Default::default(),
120 dc_mul_add: Default::default(),
121 ac_offset_eob: Default::default(),
122 ac_offset0: Default::default(),
123 ac_offset1: Default::default(),
124 ac_mul_add: Default::default(),
125 }
126 }
127}
128
129fn divu_gen(d: NonZeroU32) -> (u32, u32, u32) {
130 let nbits = (mem::size_of_val(&d) as u64) * 8;
131 let m = nbits - d.leading_zeros() as u64 - 1;
132 if d.is_power_of_two() {
133 (0xFFFF_FFFF, 0xFFFF_FFFF, m as u32)
134 } else {
135 let d = NonZeroU64::from(d);
136 let t = (1u64 << (m + nbits)) / d;
137
138 let d = d.get();
139 let r = (t * d + d) & ((1 << nbits) - 1);
140 if r <= 1u64 << m {
141 (t as u32 + 1, 0u32, m as u32)
142 } else {
143 (t as u32, t as u32, m as u32)
144 }
145 }
146}
147
148#[inline]
149const fn divu_pair(x: u32, d: (u32, u32, u32)) -> u32 {
150 let x = x as u64;
151 let (a, b, shift) = d;
152 let shift = shift as u64;
153 let a = a as u64;
154 let b = b as u64;
155
156 (((a * x + b) >> 32) >> shift) as u32
157}
158
159#[inline]
160const fn copysign(value: u32, signed: i32) -> i32 {
161 if signed < 0 {
162 -(value as i32)
163 } else {
164 value as i32
165 }
166}
167
168#[cfg(test)]
169mod test {
170 use super::*;
171 use crate::transform::TxSize::*;
172
173 #[test]
174 fn test_divu_pair() {
175 for d in 1..1024 {
176 for x in 0..1000 {
177 let ab = divu_gen(NonZeroU32::new(d).unwrap());
178 assert_eq!(x / d, divu_pair(x, ab));
179 }
180 }
181 }
182 #[test]
183 fn gen_divu_table() {
184 let b: Vec<(u32, u32, u32)> =
185 dc_qlookup_Q3.iter().map(|&v| divu_gen(v.into())).collect();
186
187 println!("{:?}", b);
188 }
189 #[test]
190 fn test_tx_log_scale() {
191 let tx_sizes = [
192 (TX_4X4, 0),
193 (TX_8X8, 0),
194 (TX_16X16, 0),
195 (TX_32X32, 1),
196 (TX_64X64, 2),
197 (TX_4X8, 0),
198 (TX_8X4, 0),
199 (TX_8X16, 0),
200 (TX_16X8, 0),
201 (TX_16X32, 1),
202 (TX_32X16, 1),
203 (TX_32X64, 2),
204 (TX_64X32, 2),
205 (TX_4X16, 0),
206 (TX_16X4, 0),
207 (TX_8X32, 0),
208 (TX_32X8, 0),
209 (TX_16X64, 1),
210 (TX_64X16, 1),
211 ];
212 for &tx_size in tx_sizes.iter() {
213 assert!(tx_size.1 == get_log_tx_scale(tx_size.0));
214 }
215 }
216}
217
218impl QuantizationContext {
219 pub fn update(
220 &mut self, qindex: u8, tx_size: TxSize, is_intra: bool, bit_depth: usize,
221 dc_delta_q: i8, ac_delta_q: i8,
222 ) {
223 self.log_tx_scale = get_log_tx_scale(tx_size);
224
225 self.dc_quant = dc_q(qindex, dc_delta_q, bit_depth);
226 self.dc_mul_add = divu_gen(self.dc_quant.into());
227
228 self.ac_quant = ac_q(qindex, ac_delta_q, bit_depth);
229 self.ac_mul_add = divu_gen(self.ac_quant.into());
230
231 self.dc_offset =
259 self.dc_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
260 self.ac_offset0 =
261 self.ac_quant.get() as u32 * (if is_intra { 98 } else { 97 }) / 256;
262 self.ac_offset1 =
263 self.ac_quant.get() as u32 * (if is_intra { 109 } else { 108 }) / 256;
264 self.ac_offset_eob =
265 self.ac_quant.get() as u32 * (if is_intra { 88 } else { 44 }) / 256;
266 }
267
268 #[inline]
269 pub fn quantize<T: Coefficient>(
270 &self, coeffs: &[T], qcoeffs: &mut [T], tx_size: TxSize, tx_type: TxType,
271 ) -> u16 {
272 let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
273 let iscan = av1_scan_orders[tx_size as usize][tx_type as usize].iscan;
274
275 qcoeffs[0] = {
276 let coeff: i32 = i32::cast_from(coeffs[0]) << self.log_tx_scale;
277 let abs_coeff = coeff.unsigned_abs();
278 T::cast_from(copysign(
279 divu_pair(abs_coeff + self.dc_offset, self.dc_mul_add),
280 coeff,
281 ))
282 };
283
284 let deadzone = T::cast_from(
289 (self.ac_quant.get() as usize - self.ac_offset_eob as usize)
290 .align_power_of_two_and_shift(self.log_tx_scale),
291 );
292 let eob = {
293 let eob_minus_one = iscan
294 .iter()
295 .zip(coeffs)
296 .map(|(&i, &c)| if c.abs() >= deadzone { i } else { 0 })
297 .max()
298 .unwrap_or(0);
299 if eob_minus_one > 0 {
301 eob_minus_one + 1
302 } else {
303 u16::from(qcoeffs[0] != T::cast_from(0))
304 }
305 };
306
307 let mut level_mode = 1;
318 let ac_quant = self.ac_quant.get() as u32;
319 for &pos in scan.iter().take(usize::from(eob)).skip(1) {
320 let coeff = i32::cast_from(coeffs[pos as usize]) << self.log_tx_scale;
321 let abs_coeff = coeff.unsigned_abs();
322
323 let level0 = divu_pair(abs_coeff, self.ac_mul_add);
324 let offset = if level0 > 1 - level_mode {
325 self.ac_offset1
326 } else {
327 self.ac_offset0
328 };
329
330 let abs_qcoeff: u32 =
331 level0 + (abs_coeff + offset >= (level0 + 1) * ac_quant) as u32;
332 if level_mode != 0 && abs_qcoeff == 0 {
333 level_mode = 0;
334 } else if abs_qcoeff > 1 {
335 level_mode = 1;
336 }
337
338 qcoeffs[pos as usize] = T::cast_from(copysign(abs_qcoeff, coeff));
339 }
340
341 debug_assert_eq!(
346 usize::from(eob),
347 scan
348 .iter()
349 .rposition(|&i| qcoeffs[i as usize] != T::cast_from(0))
350 .map(|n| n + 1)
351 .unwrap_or(0)
352 );
353
354 eob
355 }
356}
357
358pub mod rust {
359 use super::*;
360 use crate::cpu_features::CpuFeatureLevel;
361 use std::mem::MaybeUninit;
362
363 pub fn dequantize<T: Coefficient>(
364 qindex: u8, coeffs: &[T], _eob: u16, rcoeffs: &mut [MaybeUninit<T>],
365 tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
366 _cpu: CpuFeatureLevel,
367 ) {
368 let log_tx_scale = get_log_tx_scale(tx_size) as i32;
369 let offset = (1 << log_tx_scale) - 1;
370
371 let dc_quant = dc_q(qindex, dc_delta_q, bit_depth).get() as i32;
372 let ac_quant = ac_q(qindex, ac_delta_q, bit_depth).get() as i32;
373
374 for (i, (r, c)) in rcoeffs
375 .iter_mut()
376 .zip(coeffs.iter().map(|&c| i32::cast_from(c)))
377 .enumerate()
378 {
379 let quant = if i == 0 { dc_quant } else { ac_quant };
380 r.write(T::cast_from(
381 (c * quant + ((c >> 31) & offset)) >> log_tx_scale,
382 ));
383 }
384 }
385}