1#![cfg(feature = "lut")]
30#![allow(dead_code)]
31use crate::conversions::lut_transforms::LUT_SAMPLING;
32use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
33use crate::mlaf::fmla;
34use crate::{Vector3f, Vector4f};
35use std::ops::{Add, Mul, Sub};
36
37#[cfg(feature = "options")]
38pub(crate) struct Tetrahedral<const GRID_SIZE: usize> {}
39
40#[cfg(feature = "options")]
41pub(crate) struct Pyramidal<const GRID_SIZE: usize> {}
42
43#[cfg(feature = "options")]
44pub(crate) struct Prismatic<const GRID_SIZE: usize> {}
45
46pub(crate) struct Trilinear<const GRID_SIZE: usize> {}
47
48#[derive(Debug, Copy, Clone, Default)]
49pub(crate) struct BarycentricWeight<V> {
50 pub x: i32,
51 pub x_n: i32,
52 pub w: V,
53}
54
55impl BarycentricWeight<f32> {
56 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
57 {
58 let mut weights = Box::new([BarycentricWeight::default(); 256]);
59 for (index, weight) in weights.iter_mut().enumerate() {
60 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
61 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
62
63 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
64
65 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
66
67 let dr = fmla(index as f32, scale, -x as f32);
68 *weight = BarycentricWeight { x, x_n, w: dr };
69 }
70 weights
71 }
72
73 #[cfg(feature = "options")]
74 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
75 -> Box<[BarycentricWeight<f32>; 65536]> {
76 let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
77 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
78 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
79 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
80
81 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
82
83 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
84
85 let dr = fmla(index as f32, scale, -x as f32);
86 *weight = BarycentricWeight { x, x_n, w: dr };
87 }
88 weights
89 }
90}
91
92#[allow(dead_code)]
93impl BarycentricWeight<i16> {
94 pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
95 {
96 let mut weights = Box::new([BarycentricWeight::default(); 256]);
97 for (index, weight) in weights.iter_mut().enumerate() {
98 const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
99 let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
100
101 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
102
103 let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
104
105 const Q: f32 = ((1i32 << 15) - 1) as f32;
106
107 let dr = (fmla(index as f32, scale, -x as f32) * Q)
108 .round()
109 .min(i16::MAX as f32)
110 .max(-i16::MAX as f32) as i16;
111 *weight = BarycentricWeight { x, x_n, w: dr };
112 }
113 weights
114 }
115
116 #[cfg(feature = "options")]
117 pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
118 -> Box<[BarycentricWeight<i16>; 65536]> {
119 let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
120 let b_scale: f32 = 1.0 / (BINS - 1) as f32;
121 for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
122 let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
123
124 let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
125
126 let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
127
128 const Q: f32 = ((1i32 << 15) - 1) as f32;
129
130 let dr = (fmla(index as f32, scale, -x as f32) * Q)
131 .round()
132 .min(i16::MAX as f32)
133 .max(-i16::MAX as f32) as i16;
134 *weight = BarycentricWeight { x, x_n, w: dr };
135 }
136 weights
137 }
138}
139
140trait Fetcher<T> {
141 fn fetch(&self, x: i32, y: i32, z: i32) -> T;
142}
143
144struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
145 cube: &'a [f32],
146}
147
148pub(crate) trait MultidimensionalInterpolation {
149 fn inter3(
150 &self,
151 cube: &[f32],
152 lut_r: &BarycentricWeight<f32>,
153 lut_g: &BarycentricWeight<f32>,
154 lut_b: &BarycentricWeight<f32>,
155 ) -> Vector3f;
156 fn inter4(
157 &self,
158 cube: &[f32],
159 lut_r: &BarycentricWeight<f32>,
160 lut_g: &BarycentricWeight<f32>,
161 lut_b: &BarycentricWeight<f32>,
162 ) -> Vector4f;
163}
164
165impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
166 #[inline(always)]
167 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
168 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
169 + y as u32 * GRID_SIZE as u32
170 + z as u32) as usize
171 * 3;
172 let jx = &self.cube[offset..offset + 3];
173 Vector3f {
174 v: [jx[0], jx[1], jx[2]],
175 }
176 }
177}
178
179struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
180 cube: &'a [f32],
181}
182
183impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
184 #[inline(always)]
185 fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
186 let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
187 + y as u32 * GRID_SIZE as u32
188 + z as u32) as usize
189 * 4;
190 let jx = &self.cube[offset..offset + 4];
191 Vector4f {
192 v: [jx[0], jx[1], jx[2], jx[3]],
193 }
194 }
195}
196
197#[cfg(feature = "options")]
198impl<const GRID_SIZE: usize> Tetrahedral<GRID_SIZE> {
199 #[inline]
200 fn interpolate<
201 T: Copy
202 + Sub<T, Output = T>
203 + Mul<T, Output = T>
204 + Mul<f32, Output = T>
205 + Add<T, Output = T>
206 + From<f32>
207 + FusedMultiplyAdd<T>,
208 >(
209 &self,
210 lut_r: &BarycentricWeight<f32>,
211 lut_g: &BarycentricWeight<f32>,
212 lut_b: &BarycentricWeight<f32>,
213 r: impl Fetcher<T>,
214 ) -> T {
215 let x: i32 = lut_r.x;
216 let y: i32 = lut_g.x;
217 let z: i32 = lut_b.x;
218
219 let x_n: i32 = lut_r.x_n;
220 let y_n: i32 = lut_g.x_n;
221 let z_n: i32 = lut_b.x_n;
222
223 let rx = lut_r.w;
224 let ry = lut_g.w;
225 let rz = lut_b.w;
226
227 let c0 = r.fetch(x, y, z);
228 let c2;
229 let c1;
230 let c3;
231 if rx >= ry {
232 if ry >= rz {
233 c1 = r.fetch(x_n, y, z) - c0;
235 c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
236 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
237 } else if rx >= rz {
238 c1 = r.fetch(x_n, y, z) - c0;
240 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
241 c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
242 } else {
243 c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
245 c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
246 c3 = r.fetch(x, y, z_n) - c0;
247 }
248 } else if rx >= rz {
249 c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
251 c2 = r.fetch(x, y_n, z) - c0;
252 c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
253 } else if ry >= rz {
254 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
256 c2 = r.fetch(x, y_n, z) - c0;
257 c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
258 } else {
259 c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
261 c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
262 c3 = r.fetch(x, y, z_n) - c0;
263 }
264 let s0 = c0.mla(c1, T::from(rx));
265 let s1 = s0.mla(c2, T::from(ry));
266 s1.mla(c3, T::from(rz))
267 }
268}
269
270macro_rules! define_md_inter {
271 ($interpolator: ident) => {
272 impl<const GRID_SIZE: usize> MultidimensionalInterpolation for $interpolator<GRID_SIZE> {
273 fn inter3(
274 &self,
275 cube: &[f32],
276 lut_r: &BarycentricWeight<f32>,
277 lut_g: &BarycentricWeight<f32>,
278 lut_b: &BarycentricWeight<f32>,
279 ) -> Vector3f {
280 self.interpolate::<Vector3f>(
281 lut_r,
282 lut_g,
283 lut_b,
284 TetrahedralFetchVector3f::<GRID_SIZE> { cube },
285 )
286 }
287
288 fn inter4(
289 &self,
290 cube: &[f32],
291 lut_r: &BarycentricWeight<f32>,
292 lut_g: &BarycentricWeight<f32>,
293 lut_b: &BarycentricWeight<f32>,
294 ) -> Vector4f {
295 self.interpolate::<Vector4f>(
296 lut_r,
297 lut_g,
298 lut_b,
299 TetrahedralFetchVector4f::<GRID_SIZE> { cube },
300 )
301 }
302 }
303 };
304}
305
306#[cfg(feature = "options")]
307define_md_inter!(Tetrahedral);
308#[cfg(feature = "options")]
309define_md_inter!(Pyramidal);
310#[cfg(feature = "options")]
311define_md_inter!(Prismatic);
312define_md_inter!(Trilinear);
313
314#[cfg(feature = "options")]
315impl<const GRID_SIZE: usize> Pyramidal<GRID_SIZE> {
316 #[inline]
317 fn interpolate<
318 T: Copy
319 + Sub<T, Output = T>
320 + Mul<T, Output = T>
321 + Mul<f32, Output = T>
322 + Add<T, Output = T>
323 + From<f32>
324 + FusedMultiplyAdd<T>,
325 >(
326 &self,
327 lut_r: &BarycentricWeight<f32>,
328 lut_g: &BarycentricWeight<f32>,
329 lut_b: &BarycentricWeight<f32>,
330 r: impl Fetcher<T>,
331 ) -> T {
332 let x: i32 = lut_r.x;
333 let y: i32 = lut_g.x;
334 let z: i32 = lut_b.x;
335
336 let x_n: i32 = lut_r.x_n;
337 let y_n: i32 = lut_g.x_n;
338 let z_n: i32 = lut_b.x_n;
339
340 let dr = lut_r.w;
341 let dg = lut_g.w;
342 let db = lut_b.w;
343
344 let c0 = r.fetch(x, y, z);
345
346 if dr > db && dg > db {
347 let x0 = r.fetch(x_n, y_n, z_n);
348 let x1 = r.fetch(x_n, y_n, z);
349 let x2 = r.fetch(x_n, y, z);
350 let x3 = r.fetch(x, y_n, z);
351
352 let c1 = x0 - x1;
353 let c2 = x2 - c0;
354 let c3 = x3 - c0;
355 let c4 = c0 - x3 - x2 + x1;
356
357 let s0 = c0.mla(c1, T::from(db));
358 let s1 = s0.mla(c2, T::from(dr));
359 let s2 = s1.mla(c3, T::from(dg));
360 s2.mla(c4, T::from(dr * dg))
361 } else if db > dr && dg > dr {
362 let x0 = r.fetch(x, y, z_n);
363 let x1 = r.fetch(x_n, y_n, z_n);
364 let x2 = r.fetch(x, y_n, z_n);
365 let x3 = r.fetch(x, y_n, z);
366
367 let c1 = x0 - c0;
368 let c2 = x1 - x2;
369 let c3 = x3 - c0;
370 let c4 = c0 - x3 - x0 + x2;
371
372 let s0 = c0.mla(c1, T::from(db));
373 let s1 = s0.mla(c2, T::from(dr));
374 let s2 = s1.mla(c3, T::from(dg));
375 s2.mla(c4, T::from(dg * db))
376 } else {
377 let x0 = r.fetch(x, y, z_n);
378 let x1 = r.fetch(x_n, y, z);
379 let x2 = r.fetch(x_n, y, z_n);
380 let x3 = r.fetch(x_n, y_n, z_n);
381
382 let c1 = x0 - c0;
383 let c2 = x1 - c0;
384 let c3 = x3 - x2;
385 let c4 = c0 - x1 - x0 + x2;
386
387 let s0 = c0.mla(c1, T::from(db));
388 let s1 = s0.mla(c2, T::from(dr));
389 let s2 = s1.mla(c3, T::from(dg));
390 s2.mla(c4, T::from(db * dr))
391 }
392 }
393}
394
395#[cfg(feature = "options")]
396impl<const GRID_SIZE: usize> Prismatic<GRID_SIZE> {
397 #[inline(always)]
398 fn interpolate<
399 T: Copy
400 + Sub<T, Output = T>
401 + Mul<T, Output = T>
402 + Mul<f32, Output = T>
403 + Add<T, Output = T>
404 + From<f32>
405 + FusedMultiplyAdd<T>,
406 >(
407 &self,
408 lut_r: &BarycentricWeight<f32>,
409 lut_g: &BarycentricWeight<f32>,
410 lut_b: &BarycentricWeight<f32>,
411 r: impl Fetcher<T>,
412 ) -> T {
413 let x: i32 = lut_r.x;
414 let y: i32 = lut_g.x;
415 let z: i32 = lut_b.x;
416
417 let x_n: i32 = lut_r.x_n;
418 let y_n: i32 = lut_g.x_n;
419 let z_n: i32 = lut_b.x_n;
420
421 let dr = lut_r.w;
422 let dg = lut_g.w;
423 let db = lut_b.w;
424
425 let c0 = r.fetch(x, y, z);
426
427 if db >= dr {
428 let x0 = r.fetch(x, y, z_n);
429 let x1 = r.fetch(x_n, y, z_n);
430 let x2 = r.fetch(x, y_n, z);
431 let x3 = r.fetch(x, y_n, z_n);
432 let x4 = r.fetch(x_n, y_n, z_n);
433
434 let c1 = x0 - c0;
435 let c2 = x1 - x0;
436 let c3 = x2 - c0;
437 let c4 = c0 - x2 - x0 + x3;
438 let c5 = x0 - x3 - x1 + x4;
439
440 let s0 = c0.mla(c1, T::from(db));
441 let s1 = s0.mla(c2, T::from(dr));
442 let s2 = s1.mla(c3, T::from(dg));
443 let s3 = s2.mla(c4, T::from(dg * db));
444 s3.mla(c5, T::from(dr * dg))
445 } else {
446 let x0 = r.fetch(x_n, y, z);
447 let x1 = r.fetch(x_n, y, z_n);
448 let x2 = r.fetch(x, y_n, z);
449 let x3 = r.fetch(x_n, y_n, z);
450 let x4 = r.fetch(x_n, y_n, z_n);
451
452 let c1 = x1 - x0;
453 let c2 = x0 - c0;
454 let c3 = x2 - c0;
455 let c4 = x0 - x3 - x1 + x4;
456 let c5 = c0 - x2 - x0 + x3;
457
458 let s0 = c0.mla(c1, T::from(db));
459 let s1 = s0.mla(c2, T::from(dr));
460 let s2 = s1.mla(c3, T::from(dg));
461 let s3 = s2.mla(c4, T::from(dg * db));
462 s3.mla(c5, T::from(dr * dg))
463 }
464 }
465}
466
467impl<const GRID_SIZE: usize> Trilinear<GRID_SIZE> {
468 #[inline(always)]
469 fn interpolate<
470 T: Copy
471 + Sub<T, Output = T>
472 + Mul<T, Output = T>
473 + Mul<f32, Output = T>
474 + Add<T, Output = T>
475 + From<f32>
476 + FusedMultiplyAdd<T>
477 + FusedMultiplyNegAdd<T>,
478 >(
479 &self,
480 lut_r: &BarycentricWeight<f32>,
481 lut_g: &BarycentricWeight<f32>,
482 lut_b: &BarycentricWeight<f32>,
483 r: impl Fetcher<T>,
484 ) -> T {
485 let x: i32 = lut_r.x;
486 let y: i32 = lut_g.x;
487 let z: i32 = lut_b.x;
488
489 let x_n: i32 = lut_r.x_n;
490 let y_n: i32 = lut_g.x_n;
491 let z_n: i32 = lut_b.x_n;
492
493 let dr = lut_r.w;
494 let dg = lut_g.w;
495 let db = lut_b.w;
496
497 let w0 = T::from(dr);
498 let w1 = T::from(dg);
499 let w2 = T::from(db);
500
501 let c000 = r.fetch(x, y, z);
502 let c100 = r.fetch(x_n, y, z);
503 let c010 = r.fetch(x, y_n, z);
504 let c110 = r.fetch(x_n, y_n, z);
505 let c001 = r.fetch(x, y, z_n);
506 let c101 = r.fetch(x_n, y, z_n);
507 let c011 = r.fetch(x, y_n, z_n);
508 let c111 = r.fetch(x_n, y_n, z_n);
509
510 let dx = T::from(dr);
511
512 let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
513 let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
514 let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
515 let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
516
517 let dy = T::from(dg);
518
519 let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
520 let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
521
522 let dz = T::from(db);
523
524 c0.neg_mla(c0, dz).mla(c1, w2)
525 }
526}