1#[cfg(feature = "in_place")]
30use crate::InPlaceTransformExecutor;
31use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
32use num_traits::AsPrimitive;
33use std::sync::Arc;
34
35pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
36 pub(crate) r_linear: Box<[f32; BUCKET]>,
37 pub(crate) g_linear: Box<[f32; BUCKET]>,
38 pub(crate) b_linear: Box<[f32; BUCKET]>,
39 pub(crate) r_gamma: Box<[T; 65536]>,
40 pub(crate) g_gamma: Box<[T; 65536]>,
41 pub(crate) b_gamma: Box<[T; 65536]>,
42 pub(crate) adaptation_matrix: Matrix3f,
43}
44
45impl<T: Clone, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
46 #[inline(never)]
47 #[allow(dead_code)]
48 fn convert_to_v(self) -> TransformMatrixShaperV<T> {
49 TransformMatrixShaperV {
50 r_linear: self.r_linear.iter().copied().collect(),
51 g_linear: self.g_linear.iter().copied().collect(),
52 b_linear: self.b_linear.iter().copied().collect(),
53 r_gamma: self.r_gamma,
54 g_gamma: self.g_gamma,
55 b_gamma: self.b_gamma,
56 adaptation_matrix: self.adaptation_matrix,
57 }
58 }
59}
60
61#[allow(dead_code)]
62pub(crate) struct TransformMatrixShaperV<T: Clone> {
63 pub(crate) r_linear: Vec<f32>,
64 pub(crate) g_linear: Vec<f32>,
65 pub(crate) b_linear: Vec<f32>,
66 pub(crate) r_gamma: Box<[T; 65536]>,
67 pub(crate) g_gamma: Box<[T; 65536]>,
68 pub(crate) b_gamma: Box<[T; 65536]>,
69 pub(crate) adaptation_matrix: Matrix3f,
70}
71
72pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
75 pub(crate) linear: Box<[f32; BUCKET]>,
76 pub(crate) gamma: Box<[T; 65536]>,
77 pub(crate) adaptation_matrix: Matrix3f,
78}
79
80#[allow(dead_code)]
81impl<T: Clone, const BUCKET: usize> TransformMatrixShaperOptimized<T, BUCKET> {
82 fn convert_to_v(self) -> TransformMatrixShaperOptimizedV<T> {
83 TransformMatrixShaperOptimizedV {
84 linear: self.linear.iter().copied().collect::<Vec<_>>(),
85 gamma: self.gamma,
86 adaptation_matrix: self.adaptation_matrix,
87 }
88 }
89}
90
91#[allow(dead_code)]
94pub(crate) struct TransformMatrixShaperOptimizedV<T: Clone> {
95 pub(crate) linear: Vec<f32>,
96 pub(crate) gamma: Box<[T; 65536]>,
97 pub(crate) adaptation_matrix: Matrix3f,
98}
99
100impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
101 #[inline(never)]
102 #[allow(dead_code)]
103 pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
104 &self,
105 gamma_lut: usize,
106 bit_depth: usize,
107 ) -> TransformMatrixShaperFp<R, T>
108 where
109 f32: AsPrimitive<R>,
110 {
111 let linear_scale = if T::FINITE {
112 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
113 ((1 << bit_depth) - 1) as f32 * lut_scale
114 } else {
115 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
116 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
117 };
118 let new_box_r = self
119 .r_linear
120 .iter()
121 .map(|&x| (x * linear_scale).round().as_())
122 .collect::<Vec<R>>();
123 let new_box_g = self
124 .g_linear
125 .iter()
126 .map(|&x| (x * linear_scale).round().as_())
127 .collect::<Vec<R>>();
128 let new_box_b = self
129 .b_linear
130 .iter()
131 .map(|&x| (x * linear_scale).round().as_())
132 .collect::<Vec<_>>();
133 let scale: f32 = (1i32 << PRECISION) as f32;
134 let source_matrix = self.adaptation_matrix;
135 let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
136 for i in 0..3 {
137 for j in 0..3 {
138 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
139 }
140 }
141 TransformMatrixShaperFp {
142 r_linear: new_box_r,
143 g_linear: new_box_g,
144 b_linear: new_box_b,
145 r_gamma: self.r_gamma.clone(),
146 g_gamma: self.g_gamma.clone(),
147 b_gamma: self.b_gamma.clone(),
148 adaptation_matrix: dst_matrix,
149 }
150 }
151}
152
153impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
154 TransformMatrixShaperOptimized<T, BUCKET>
155{
156 #[allow(dead_code)]
157 pub(crate) fn to_q2_13_n<
158 R: Copy + 'static + Default,
159 const PRECISION: i32,
160 const LINEAR_CAP: usize,
161 >(
162 &self,
163 gamma_lut: usize,
164 bit_depth: usize,
165 ) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
166 where
167 f32: AsPrimitive<R>,
168 {
169 let linear_scale = if T::FINITE {
170 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
171 ((1 << bit_depth) - 1) as f32 * lut_scale
172 } else {
173 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
174 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
175 };
176 let mut new_box_linear = Box::new([R::default(); BUCKET]);
177 for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
178 *dst = (*src * linear_scale).round().as_();
179 }
180 let scale: f32 = (1i32 << PRECISION) as f32;
181 let source_matrix = self.adaptation_matrix;
182 let mut dst_matrix = Matrix3::<i16> {
183 v: [[i16::default(); 3]; 3],
184 };
185 for i in 0..3 {
186 for j in 0..3 {
187 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
188 }
189 }
190 TransformMatrixShaperFixedPointOpt {
191 linear: new_box_linear,
192 gamma: self.gamma.clone(),
193 adaptation_matrix: dst_matrix,
194 }
195 }
196
197 #[allow(dead_code)]
198 pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
199 &self,
200 gamma_lut: usize,
201 bit_depth: usize,
202 ) -> TransformMatrixShaperFpOptVec<R, i16, T>
203 where
204 f32: AsPrimitive<R>,
205 {
206 let linear_scale = if T::FINITE {
207 let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
208 ((1 << bit_depth) - 1) as f32 * lut_scale
209 } else {
210 let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
211 (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
212 };
213 let new_box_linear = self
214 .linear
215 .iter()
216 .map(|&x| (x * linear_scale).round().as_())
217 .collect::<Vec<R>>();
218 let scale: f32 = (1i32 << PRECISION) as f32;
219 let source_matrix = self.adaptation_matrix;
220 let mut dst_matrix = Matrix3::<i16> {
221 v: [[i16::default(); 3]; 3],
222 };
223 for i in 0..3 {
224 for j in 0..3 {
225 dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
226 }
227 }
228 TransformMatrixShaperFpOptVec {
229 linear: new_box_linear,
230 gamma: self.gamma.clone(),
231 adaptation_matrix: dst_matrix,
232 }
233 }
234
235 #[cfg(all(target_arch = "aarch64", feature = "neon_shaper_fixed_point_paths"))]
236 pub(crate) fn to_q1_30_n<R: Copy + 'static + Default, const PRECISION: i32>(
237 &self,
238 gamma_lut: usize,
239 bit_depth: usize,
240 ) -> TransformMatrixShaperFpOptVec<R, i32, T>
241 where
242 f32: AsPrimitive<R>,
243 f64: AsPrimitive<R>,
244 {
245 let table_size = if T::FINITE {
247 (1 << bit_depth) - 1
248 } else {
249 T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
250 };
251 let ext_bp = if T::FINITE {
252 bit_depth as u32 + 1
253 } else {
254 let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
255 bp + 1
256 };
257 let linear_scale = {
258 let lut_scale = (gamma_lut - 1) as f64 / table_size as f64;
259 ((1u32 << ext_bp) - 1) as f64 * lut_scale
260 };
261 let new_box_linear = self
262 .linear
263 .iter()
264 .map(|&v| (v as f64 * linear_scale).round().as_())
265 .collect::<Vec<R>>();
266 let scale: f64 = (1i64 << PRECISION) as f64;
267 let source_matrix = self.adaptation_matrix;
268 let mut dst_matrix = Matrix3::<i32> {
269 v: [[i32::default(); 3]; 3],
270 };
271 for i in 0..3 {
272 for j in 0..3 {
273 dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
274 }
275 }
276 TransformMatrixShaperFpOptVec {
277 linear: new_box_linear,
278 gamma: self.gamma.clone(),
279 adaptation_matrix: dst_matrix,
280 }
281 }
282}
283
284#[allow(unused)]
285struct TransformMatrixShaperScalar<
286 T: Clone,
287 const SRC_LAYOUT: u8,
288 const DST_LAYOUT: u8,
289 const LINEAR_CAP: usize,
290> {
291 pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
292 pub(crate) gamma_lut: usize,
293 pub(crate) bit_depth: usize,
294}
295
296#[allow(unused)]
297struct TransformMatrixShaperOptScalar<
298 T: Clone,
299 const SRC_LAYOUT: u8,
300 const DST_LAYOUT: u8,
301 const LINEAR_CAP: usize,
302> {
303 pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
304 pub(crate) gamma_lut: usize,
305 pub(crate) bit_depth: usize,
306}
307
308#[cfg(any(
309 any(target_arch = "x86", target_arch = "x86_64"),
310 target_arch = "aarch64"
311))]
312#[allow(unused)]
313macro_rules! create_rgb_xyz_dependant_executor {
314 ($dep_name: ident, $dependant: ident, $shaper: ident) => {
315 pub(crate) fn $dep_name<
316 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
317 const LINEAR_CAP: usize,
318 >(
319 src_layout: Layout,
320 dst_layout: Layout,
321 profile: $shaper<T, LINEAR_CAP>,
322 gamma_lut: usize,
323 bit_depth: usize,
324 ) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
325 where
326 u32: AsPrimitive<T>,
327 {
328 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
329 return Ok(Arc::new($dependant::<
330 T,
331 { Layout::Rgba as u8 },
332 { Layout::Rgba as u8 },
333 LINEAR_CAP,
334 > {
335 profile,
336 bit_depth,
337 gamma_lut,
338 }));
339 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
340 return Ok(Arc::new($dependant::<
341 T,
342 { Layout::Rgb as u8 },
343 { Layout::Rgba as u8 },
344 LINEAR_CAP,
345 > {
346 profile,
347 bit_depth,
348 gamma_lut,
349 }));
350 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
351 return Ok(Arc::new($dependant::<
352 T,
353 { Layout::Rgba as u8 },
354 { Layout::Rgb as u8 },
355 LINEAR_CAP,
356 > {
357 profile,
358 bit_depth,
359 gamma_lut,
360 }));
361 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
362 return Ok(Arc::new($dependant::<
363 T,
364 { Layout::Rgb as u8 },
365 { Layout::Rgb as u8 },
366 LINEAR_CAP,
367 > {
368 profile,
369 bit_depth,
370 gamma_lut,
371 }));
372 }
373 Err(CmsError::UnsupportedProfileConnection)
374 }
375 };
376}
377
378#[cfg(any(
379 any(target_arch = "x86", target_arch = "x86_64"),
380 target_arch = "aarch64"
381))]
382#[allow(unused)]
383macro_rules! create_rgb_xyz_dependant_executor_to_v {
384 ($dep_name: ident, $dependant: ident, $shaper: ident) => {
385 pub(crate) fn $dep_name<
386 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
387 const LINEAR_CAP: usize,
388 >(
389 src_layout: Layout,
390 dst_layout: Layout,
391 profile: $shaper<T, LINEAR_CAP>,
392 gamma_lut: usize,
393 bit_depth: usize,
394 ) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
395 where
396 u32: AsPrimitive<T>,
397 {
398 let profile = profile.convert_to_v();
399 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
400 return Ok(Arc::new($dependant::<
401 T,
402 { Layout::Rgba as u8 },
403 { Layout::Rgba as u8 },
404 > {
405 profile,
406 bit_depth,
407 gamma_lut,
408 }));
409 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
410 return Ok(Arc::new($dependant::<
411 T,
412 { Layout::Rgb as u8 },
413 { Layout::Rgba as u8 },
414 > {
415 profile,
416 bit_depth,
417 gamma_lut,
418 }));
419 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
420 return Ok(Arc::new($dependant::<
421 T,
422 { Layout::Rgba as u8 },
423 { Layout::Rgb as u8 },
424 > {
425 profile,
426 bit_depth,
427 gamma_lut,
428 }));
429 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
430 return Ok(Arc::new($dependant::<
431 T,
432 { Layout::Rgb as u8 },
433 { Layout::Rgb as u8 },
434 > {
435 profile,
436 bit_depth,
437 gamma_lut,
438 }));
439 }
440 Err(CmsError::UnsupportedProfileConnection)
441 }
442 };
443}
444
445#[cfg(any(
446 any(target_arch = "x86", target_arch = "x86_64"),
447 target_arch = "aarch64"
448))]
449#[allow(unused)]
450macro_rules! create_in_place_opt_rgb_xyz_fp_to_v {
451 ($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
452 pub(crate) fn $dep_name<
453 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
454 const LINEAR_CAP: usize,
455 const PRECISION: i32,
456 >(
457 layout: Layout,
458 profile: $shaper<T, LINEAR_CAP>,
459 gamma_lut: usize,
460 bit_depth: usize,
461 ) -> Result<Arc<dyn InPlaceTransformExecutor<T> + Send + Sync>, CmsError>
462 where
463 u32: AsPrimitive<T>,
464 {
465 let q2_13_profile = profile.to_q2_13_i::<$resolution, PRECISION>(gamma_lut, bit_depth);
466 if layout == Layout::Rgba {
467 return Ok(Arc::new($dependant::<
468 T,
469 { Layout::Rgba as u8 },
470 { Layout::Rgba as u8 },
471 PRECISION,
472 > {
473 profile: q2_13_profile,
474 bit_depth,
475 gamma_lut,
476 }));
477 } else if layout == Layout::Rgb {
478 return Ok(Arc::new($dependant::<
479 T,
480 { Layout::Rgb as u8 },
481 { Layout::Rgb as u8 },
482 PRECISION,
483 > {
484 profile: q2_13_profile,
485 bit_depth,
486 gamma_lut,
487 }));
488 }
489 Err(CmsError::UnsupportedProfileConnection)
490 }
491 };
492}
493
494#[allow(unused)]
495macro_rules! create_in_place_rgb_xyz {
496 ($dep_name: ident, $dependant: ident, $shaper: ident) => {
497 pub(crate) fn $dep_name<
498 T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
499 const LINEAR_CAP: usize,
500 >(
501 layout: Layout,
502 profile: $shaper<T, LINEAR_CAP>,
503 gamma_lut: usize,
504 bit_depth: usize,
505 ) -> Result<Arc<dyn InPlaceTransformExecutor<T> + Send + Sync>, CmsError>
506 where
507 u32: AsPrimitive<T>,
508 {
509 if layout == Layout::Rgba {
510 return Ok(Arc::new($dependant::<
511 T,
512 { Layout::Rgba as u8 },
513 { Layout::Rgba as u8 },
514 LINEAR_CAP,
515 > {
516 profile,
517 bit_depth,
518 gamma_lut,
519 }));
520 } else if layout == Layout::Rgb {
521 return Ok(Arc::new($dependant::<
522 T,
523 { Layout::Rgb as u8 },
524 { Layout::Rgb as u8 },
525 LINEAR_CAP,
526 > {
527 profile,
528 bit_depth,
529 gamma_lut,
530 }));
531 }
532 Err(CmsError::UnsupportedProfileConnection)
533 }
534 };
535}
536
537#[cfg(all(
538 any(target_arch = "x86", target_arch = "x86_64"),
539 feature = "sse_shaper_optimized_paths"
540))]
541use crate::conversions::sse::TransformShaperRgbOptSse;
542
543#[cfg(all(
544 any(target_arch = "x86", target_arch = "x86_64"),
545 feature = "sse_shaper_paths"
546))]
547use crate::conversions::sse::TransformShaperRgbSse;
548
549#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_paths"))]
550use crate::conversions::avx::TransformShaperRgbAvx;
551#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_optimized_paths"))]
552use crate::conversions::avx::TransformShaperRgbOptAvx;
553
554#[cfg(all(
555 any(target_arch = "x86", target_arch = "x86_64"),
556 feature = "sse_shaper_paths"
557))]
558create_rgb_xyz_dependant_executor!(
559 make_rgb_xyz_rgb_transform_sse_41,
560 TransformShaperRgbSse,
561 TransformMatrixShaper
562);
563
564#[cfg(all(
565 any(target_arch = "x86", target_arch = "x86_64"),
566 feature = "sse_shaper_optimized_paths"
567))]
568create_rgb_xyz_dependant_executor_to_v!(
569 make_rgb_xyz_rgb_transform_sse_41_opt,
570 TransformShaperRgbOptSse,
571 TransformMatrixShaperOptimized
572);
573
574#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_paths"))]
575create_rgb_xyz_dependant_executor!(
576 make_rgb_xyz_rgb_transform_avx2,
577 TransformShaperRgbAvx,
578 TransformMatrixShaper
579);
580
581#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_optimized_paths"))]
582create_rgb_xyz_dependant_executor_to_v!(
583 make_rgb_xyz_rgb_transform_avx2_opt,
584 TransformShaperRgbOptAvx,
585 TransformMatrixShaperOptimized
586);
587
588#[cfg(all(target_arch = "x86_64", feature = "avx512_shaper_optimized_paths"))]
589use crate::conversions::avx512::TransformShaperRgbOptAvx512;
590
591#[cfg(all(target_arch = "x86_64", feature = "avx512_shaper_optimized_paths"))]
592create_rgb_xyz_dependant_executor!(
593 make_rgb_xyz_rgb_transform_avx512_opt,
594 TransformShaperRgbOptAvx512,
595 TransformMatrixShaperOptimized
596);
597
598#[cfg(not(all(target_arch = "aarch64", feature = "neon_shaper_paths")))]
599pub(crate) fn make_rgb_xyz_rgb_transform<
600 T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
601 const LINEAR_CAP: usize,
602>(
603 src_layout: Layout,
604 dst_layout: Layout,
605 profile: TransformMatrixShaper<T, LINEAR_CAP>,
606 gamma_lut: usize,
607 bit_depth: usize,
608) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
609where
610 u32: AsPrimitive<T>,
611{
612 #[cfg(all(feature = "avx_shaper_paths", target_arch = "x86_64"))]
613 if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
614 return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP>(
615 src_layout, dst_layout, profile, gamma_lut, bit_depth,
616 );
617 }
618 #[cfg(all(
619 feature = "sse_shaper_paths",
620 any(target_arch = "x86", target_arch = "x86_64")
621 ))]
622 if std::arch::is_x86_feature_detected!("sse4.1") {
623 return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP>(
624 src_layout, dst_layout, profile, gamma_lut, bit_depth,
625 );
626 }
627 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
628 return Ok(Arc::new(TransformMatrixShaperScalar::<
629 T,
630 { Layout::Rgba as u8 },
631 { Layout::Rgba as u8 },
632 LINEAR_CAP,
633 > {
634 profile,
635 gamma_lut,
636 bit_depth,
637 }));
638 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
639 return Ok(Arc::new(TransformMatrixShaperScalar::<
640 T,
641 { Layout::Rgb as u8 },
642 { Layout::Rgba as u8 },
643 LINEAR_CAP,
644 > {
645 profile,
646 gamma_lut,
647 bit_depth,
648 }));
649 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
650 return Ok(Arc::new(TransformMatrixShaperScalar::<
651 T,
652 { Layout::Rgba as u8 },
653 { Layout::Rgb as u8 },
654 LINEAR_CAP,
655 > {
656 profile,
657 gamma_lut,
658 bit_depth,
659 }));
660 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
661 return Ok(Arc::new(TransformMatrixShaperScalar::<
662 T,
663 { Layout::Rgb as u8 },
664 { Layout::Rgb as u8 },
665 LINEAR_CAP,
666 > {
667 profile,
668 gamma_lut,
669 bit_depth,
670 }));
671 }
672 Err(CmsError::UnsupportedProfileConnection)
673}
674
675#[cfg(not(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths")))]
676pub(crate) fn make_rgb_xyz_rgb_transform_opt<
677 T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
678 const LINEAR_CAP: usize,
679>(
680 src_layout: Layout,
681 dst_layout: Layout,
682 profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
683 gamma_lut: usize,
684 bit_depth: usize,
685) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
686where
687 u32: AsPrimitive<T>,
688{
689 #[cfg(all(feature = "avx512_shaper_optimized_paths", target_arch = "x86_64"))]
690 if std::arch::is_x86_feature_detected!("avx512bw")
691 && std::arch::is_x86_feature_detected!("avx512vl")
692 && std::arch::is_x86_feature_detected!("fma")
693 {
694 return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP>(
695 src_layout, dst_layout, profile, gamma_lut, bit_depth,
696 );
697 }
698 #[cfg(all(feature = "avx_shaper_optimized_paths", target_arch = "x86_64"))]
699 if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
700 return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP>(
701 src_layout, dst_layout, profile, gamma_lut, bit_depth,
702 );
703 }
704 #[cfg(all(
705 feature = "sse_shaper_optimized_paths",
706 any(target_arch = "x86", target_arch = "x86_64")
707 ))]
708 if std::arch::is_x86_feature_detected!("sse4.1") {
709 return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP>(
710 src_layout, dst_layout, profile, gamma_lut, bit_depth,
711 );
712 }
713 if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
714 return Ok(Arc::new(TransformMatrixShaperOptScalar::<
715 T,
716 { Layout::Rgba as u8 },
717 { Layout::Rgba as u8 },
718 LINEAR_CAP,
719 > {
720 profile,
721 gamma_lut,
722 bit_depth,
723 }));
724 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
725 return Ok(Arc::new(TransformMatrixShaperOptScalar::<
726 T,
727 { Layout::Rgb as u8 },
728 { Layout::Rgba as u8 },
729 LINEAR_CAP,
730 > {
731 profile,
732 gamma_lut,
733 bit_depth,
734 }));
735 } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
736 return Ok(Arc::new(TransformMatrixShaperOptScalar::<
737 T,
738 { Layout::Rgba as u8 },
739 { Layout::Rgb as u8 },
740 LINEAR_CAP,
741 > {
742 profile,
743 gamma_lut,
744 bit_depth,
745 }));
746 } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
747 return Ok(Arc::new(TransformMatrixShaperOptScalar::<
748 T,
749 { Layout::Rgb as u8 },
750 { Layout::Rgb as u8 },
751 LINEAR_CAP,
752 > {
753 profile,
754 gamma_lut,
755 bit_depth,
756 }));
757 }
758 Err(CmsError::UnsupportedProfileConnection)
759}
760
761#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_paths"))]
762use crate::conversions::neon::TransformShaperRgbNeon;
763#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths"))]
764use crate::conversions::neon::TransformShaperRgbOptNeon;
765use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec;
766use crate::conversions::rgbxyz_fixed::{
767 TransformMatrixShaperFixedPointOpt, TransformMatrixShaperFp,
768};
769use crate::transform::PointeeSizeExpressible;
770
771#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_paths"))]
772create_rgb_xyz_dependant_executor_to_v!(
773 make_rgb_xyz_rgb_transform,
774 TransformShaperRgbNeon,
775 TransformMatrixShaper
776);
777
778#[cfg(feature = "in_place")]
779create_in_place_rgb_xyz!(
780 make_in_place_rgb_xyz_transform,
781 TransformMatrixShaperScalar,
782 TransformMatrixShaper
783);
784
785#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths"))]
786create_rgb_xyz_dependant_executor_to_v!(
787 make_rgb_xyz_rgb_transform_opt,
788 TransformShaperRgbOptNeon,
789 TransformMatrixShaperOptimized
790);
791
792#[cfg(feature = "in_place")]
793create_in_place_rgb_xyz!(
794 make_rgb_xyz_in_place_transform_opt,
795 TransformMatrixShaperOptScalar,
796 TransformMatrixShaperOptimized
797);
798
799#[cfg(all(
800 target_arch = "aarch64",
801 feature = "in_place",
802 feature = "neon_shaper_fixed_point_paths"
803))]
804use crate::conversions::neon::TransformShaperQ2_13NeonOpt;
805
806#[cfg(all(
807 target_arch = "aarch64",
808 feature = "in_place",
809 feature = "neon_shaper_fixed_point_paths"
810))]
811create_in_place_opt_rgb_xyz_fp_to_v!(
812 make_rgb_xyz_in_place_transform_q2_13_opt,
813 TransformShaperQ2_13NeonOpt,
814 i16,
815 TransformMatrixShaperOptimized
816);
817
818#[cfg(all(
819 any(target_arch = "x86_64", target_arch = "x86"),
820 feature = "in_place",
821 feature = "sse_shaper_fixed_point_paths"
822))]
823use crate::conversions::sse::TransformShaperQ2_13OptSse;
824
825#[cfg(all(
826 any(target_arch = "x86_64", target_arch = "x86"),
827 feature = "in_place",
828 feature = "sse_shaper_fixed_point_paths"
829))]
830create_in_place_opt_rgb_xyz_fp_to_v!(
831 make_sse_rgb_xyz_in_place_transform_q2_13_opt,
832 TransformShaperQ2_13OptSse,
833 i32,
834 TransformMatrixShaperOptimized
835);
836
837#[cfg(all(
838 target_arch = "x86_64",
839 feature = "in_place",
840 feature = "avx_shaper_fixed_point_paths"
841))]
842use crate::conversions::avx::TransformShaperRgbQ2_13OptAvx;
843
844#[cfg(all(
845 target_arch = "x86_64",
846 feature = "in_place",
847 feature = "avx_shaper_fixed_point_paths"
848))]
849create_in_place_opt_rgb_xyz_fp_to_v!(
850 make_avx_rgb_xyz_in_place_transform_q2_13_opt,
851 TransformShaperRgbQ2_13OptAvx,
852 i32,
853 TransformMatrixShaperOptimized
854);
855
856#[allow(unused)]
857impl<
858 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
859 const SRC_LAYOUT: u8,
860 const DST_LAYOUT: u8,
861 const LINEAR_CAP: usize,
862> TransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
863where
864 u32: AsPrimitive<T>,
865{
866 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
867 use crate::mlaf::mlaf;
868 let src_cn = Layout::from(SRC_LAYOUT);
869 let dst_cn = Layout::from(DST_LAYOUT);
870 let src_channels = src_cn.channels();
871 let dst_channels = dst_cn.channels();
872
873 if src.len() / src_channels != dst.len() / dst_channels {
874 return Err(CmsError::LaneSizeMismatch);
875 }
876 if src.len() % src_channels != 0 {
877 return Err(CmsError::LaneMultipleOfChannels);
878 }
879 if dst.len() % dst_channels != 0 {
880 return Err(CmsError::LaneMultipleOfChannels);
881 }
882
883 let transform = self.profile.adaptation_matrix;
884 let scale = (self.gamma_lut - 1) as f32;
885 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
886
887 for (src, dst) in src
888 .chunks_exact(src_channels)
889 .zip(dst.chunks_exact_mut(dst_channels))
890 {
891 let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
892 let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
893 let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
894 let a = if src_channels == 4 {
895 src[src_cn.a_i()]
896 } else {
897 max_colors
898 };
899
900 let new_r = mlaf(
901 0.5f32,
902 mlaf(
903 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
904 b,
905 transform.v[0][2],
906 )
907 .max(0f32)
908 .min(1f32),
909 scale,
910 );
911
912 let new_g = mlaf(
913 0.5f32,
914 mlaf(
915 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
916 b,
917 transform.v[1][2],
918 )
919 .max(0f32)
920 .min(1f32),
921 scale,
922 );
923
924 let new_b = mlaf(
925 0.5f32,
926 mlaf(
927 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
928 b,
929 transform.v[2][2],
930 )
931 .max(0f32)
932 .min(1f32),
933 scale,
934 );
935
936 dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
937 dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
938 dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
939 if dst_channels == 4 {
940 dst[dst_cn.a_i()] = a;
941 }
942 }
943
944 Ok(())
945 }
946}
947
948#[cfg(feature = "in_place")]
949impl<
950 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
951 const SRC_LAYOUT: u8,
952 const DST_LAYOUT: u8,
953 const LINEAR_CAP: usize,
954> InPlaceTransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
955where
956 u32: AsPrimitive<T>,
957{
958 fn transform(&self, dst: &mut [T]) -> Result<(), CmsError> {
959 use crate::mlaf::mlaf;
960 assert_eq!(
961 SRC_LAYOUT, DST_LAYOUT,
962 "This is in-place transform, layout must not diverge"
963 );
964 let src_cn = Layout::from(SRC_LAYOUT);
965 let src_channels = src_cn.channels();
966
967 if dst.len() % src_channels != 0 {
968 return Err(CmsError::LaneMultipleOfChannels);
969 }
970
971 let transform = self.profile.adaptation_matrix;
972 let scale = (self.gamma_lut - 1) as f32;
973 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
974
975 for dst in dst.chunks_exact_mut(src_channels) {
976 let r = self.profile.r_linear[dst[src_cn.r_i()]._as_usize()];
977 let g = self.profile.g_linear[dst[src_cn.g_i()]._as_usize()];
978 let b = self.profile.b_linear[dst[src_cn.b_i()]._as_usize()];
979 let a = if src_channels == 4 {
980 dst[src_cn.a_i()]
981 } else {
982 max_colors
983 };
984
985 let new_r = mlaf(
986 0.5f32,
987 mlaf(
988 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
989 b,
990 transform.v[0][2],
991 )
992 .max(0f32)
993 .min(1f32),
994 scale,
995 );
996
997 let new_g = mlaf(
998 0.5f32,
999 mlaf(
1000 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1001 b,
1002 transform.v[1][2],
1003 )
1004 .max(0f32)
1005 .min(1f32),
1006 scale,
1007 );
1008
1009 let new_b = mlaf(
1010 0.5f32,
1011 mlaf(
1012 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1013 b,
1014 transform.v[2][2],
1015 )
1016 .max(0f32)
1017 .min(1f32),
1018 scale,
1019 );
1020
1021 dst[src_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
1022 dst[src_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
1023 dst[src_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
1024 if src_channels == 4 {
1025 dst[src_cn.a_i()] = a;
1026 }
1027 }
1028
1029 Ok(())
1030 }
1031}
1032
1033#[allow(unused)]
1034impl<
1035 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
1036 const SRC_LAYOUT: u8,
1037 const DST_LAYOUT: u8,
1038 const LINEAR_CAP: usize,
1039> TransformExecutor<T> for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
1040where
1041 u32: AsPrimitive<T>,
1042{
1043 fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
1044 use crate::mlaf::mlaf;
1045 let src_cn = Layout::from(SRC_LAYOUT);
1046 let dst_cn = Layout::from(DST_LAYOUT);
1047 let src_channels = src_cn.channels();
1048 let dst_channels = dst_cn.channels();
1049
1050 if src.len() / src_channels != dst.len() / dst_channels {
1051 return Err(CmsError::LaneSizeMismatch);
1052 }
1053 if src.len() % src_channels != 0 {
1054 return Err(CmsError::LaneMultipleOfChannels);
1055 }
1056 if dst.len() % dst_channels != 0 {
1057 return Err(CmsError::LaneMultipleOfChannels);
1058 }
1059
1060 let transform = self.profile.adaptation_matrix;
1061 let scale = (self.gamma_lut - 1) as f32;
1062 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
1063
1064 for (src, dst) in src
1065 .chunks_exact(src_channels)
1066 .zip(dst.chunks_exact_mut(dst_channels))
1067 {
1068 let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
1069 let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
1070 let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
1071 let a = if src_channels == 4 {
1072 src[src_cn.a_i()]
1073 } else {
1074 max_colors
1075 };
1076
1077 let new_r = mlaf(
1078 0.5f32,
1079 mlaf(
1080 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
1081 b,
1082 transform.v[0][2],
1083 )
1084 .max(0f32)
1085 .min(1f32),
1086 scale,
1087 );
1088
1089 let new_g = mlaf(
1090 0.5f32,
1091 mlaf(
1092 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1093 b,
1094 transform.v[1][2],
1095 )
1096 .max(0f32)
1097 .min(1f32),
1098 scale,
1099 );
1100
1101 let new_b = mlaf(
1102 0.5f32,
1103 mlaf(
1104 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1105 b,
1106 transform.v[2][2],
1107 )
1108 .max(0f32)
1109 .min(1f32),
1110 scale,
1111 );
1112
1113 dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
1114 dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
1115 dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
1116 if dst_channels == 4 {
1117 dst[dst_cn.a_i()] = a;
1118 }
1119 }
1120
1121 Ok(())
1122 }
1123}
1124
1125#[cfg(feature = "in_place")]
1126impl<
1127 T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
1128 const SRC_LAYOUT: u8,
1129 const DST_LAYOUT: u8,
1130 const LINEAR_CAP: usize,
1131> InPlaceTransformExecutor<T>
1132 for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
1133where
1134 u32: AsPrimitive<T>,
1135{
1136 fn transform(&self, dst: &mut [T]) -> Result<(), CmsError> {
1137 use crate::mlaf::mlaf;
1138 assert_eq!(
1139 SRC_LAYOUT, DST_LAYOUT,
1140 "This is in-place transform, layout must not diverge"
1141 );
1142 let dst_cn = Layout::from(DST_LAYOUT);
1143 let dst_channels = dst_cn.channels();
1144
1145 if dst.len() % dst_channels != 0 {
1146 return Err(CmsError::LaneMultipleOfChannels);
1147 }
1148
1149 let transform = self.profile.adaptation_matrix;
1150 let scale = (self.gamma_lut - 1) as f32;
1151 let max_colors: T = ((1 << self.bit_depth) - 1).as_();
1152
1153 for dst in dst.chunks_exact_mut(dst_channels) {
1154 let r = self.profile.linear[dst[dst_cn.r_i()]._as_usize()];
1155 let g = self.profile.linear[dst[dst_cn.g_i()]._as_usize()];
1156 let b = self.profile.linear[dst[dst_cn.b_i()]._as_usize()];
1157 let a = if dst_channels == 4 {
1158 dst[dst_cn.a_i()]
1159 } else {
1160 max_colors
1161 };
1162
1163 let new_r = mlaf(
1164 0.5f32,
1165 mlaf(
1166 mlaf(r * transform.v[0][0], g, transform.v[0][1]),
1167 b,
1168 transform.v[0][2],
1169 )
1170 .max(0f32)
1171 .min(1f32),
1172 scale,
1173 );
1174
1175 let new_g = mlaf(
1176 0.5f32,
1177 mlaf(
1178 mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1179 b,
1180 transform.v[1][2],
1181 )
1182 .max(0f32)
1183 .min(1f32),
1184 scale,
1185 );
1186
1187 let new_b = mlaf(
1188 0.5f32,
1189 mlaf(
1190 mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1191 b,
1192 transform.v[2][2],
1193 )
1194 .max(0f32)
1195 .min(1f32),
1196 scale,
1197 );
1198
1199 dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
1200 dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
1201 dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
1202 if dst_channels == 4 {
1203 dst[dst_cn.a_i()] = a;
1204 }
1205 }
1206
1207 Ok(())
1208 }
1209}