Skip to main content

moxcms/conversions/
rgbxyz.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#[cfg(feature = "in_place")]
30use crate::InPlaceTransformExecutor;
31use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
32use num_traits::AsPrimitive;
33use std::sync::Arc;
34
35pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
36    pub(crate) r_linear: Box<[f32; BUCKET]>,
37    pub(crate) g_linear: Box<[f32; BUCKET]>,
38    pub(crate) b_linear: Box<[f32; BUCKET]>,
39    pub(crate) r_gamma: Box<[T; 65536]>,
40    pub(crate) g_gamma: Box<[T; 65536]>,
41    pub(crate) b_gamma: Box<[T; 65536]>,
42    pub(crate) adaptation_matrix: Matrix3f,
43}
44
45impl<T: Clone, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
46    #[inline(never)]
47    #[allow(dead_code)]
48    fn convert_to_v(self) -> TransformMatrixShaperV<T> {
49        TransformMatrixShaperV {
50            r_linear: self.r_linear.iter().copied().collect(),
51            g_linear: self.g_linear.iter().copied().collect(),
52            b_linear: self.b_linear.iter().copied().collect(),
53            r_gamma: self.r_gamma,
54            g_gamma: self.g_gamma,
55            b_gamma: self.b_gamma,
56            adaptation_matrix: self.adaptation_matrix,
57        }
58    }
59}
60
61#[allow(dead_code)]
62pub(crate) struct TransformMatrixShaperV<T: Clone> {
63    pub(crate) r_linear: Vec<f32>,
64    pub(crate) g_linear: Vec<f32>,
65    pub(crate) b_linear: Vec<f32>,
66    pub(crate) r_gamma: Box<[T; 65536]>,
67    pub(crate) g_gamma: Box<[T; 65536]>,
68    pub(crate) b_gamma: Box<[T; 65536]>,
69    pub(crate) adaptation_matrix: Matrix3f,
70}
71
72/// Low memory footprint optimized routine for matrix shaper profiles with the same
73/// Gamma and linear curves.
74pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
75    pub(crate) linear: Box<[f32; BUCKET]>,
76    pub(crate) gamma: Box<[T; 65536]>,
77    pub(crate) adaptation_matrix: Matrix3f,
78}
79
80#[allow(dead_code)]
81impl<T: Clone, const BUCKET: usize> TransformMatrixShaperOptimized<T, BUCKET> {
82    fn convert_to_v(self) -> TransformMatrixShaperOptimizedV<T> {
83        TransformMatrixShaperOptimizedV {
84            linear: self.linear.iter().copied().collect::<Vec<_>>(),
85            gamma: self.gamma,
86            adaptation_matrix: self.adaptation_matrix,
87        }
88    }
89}
90
91/// Low memory footprint optimized routine for matrix shaper profiles with the same
92/// Gamma and linear curves.
93#[allow(dead_code)]
94pub(crate) struct TransformMatrixShaperOptimizedV<T: Clone> {
95    pub(crate) linear: Vec<f32>,
96    pub(crate) gamma: Box<[T; 65536]>,
97    pub(crate) adaptation_matrix: Matrix3f,
98}
99
100impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
101    #[inline(never)]
102    #[allow(dead_code)]
103    pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
104        &self,
105        gamma_lut: usize,
106        bit_depth: usize,
107    ) -> TransformMatrixShaperFp<R, T>
108    where
109        f32: AsPrimitive<R>,
110    {
111        let linear_scale = if T::FINITE {
112            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
113            ((1 << bit_depth) - 1) as f32 * lut_scale
114        } else {
115            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
116            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
117        };
118        let new_box_r = self
119            .r_linear
120            .iter()
121            .map(|&x| (x * linear_scale).round().as_())
122            .collect::<Vec<R>>();
123        let new_box_g = self
124            .g_linear
125            .iter()
126            .map(|&x| (x * linear_scale).round().as_())
127            .collect::<Vec<R>>();
128        let new_box_b = self
129            .b_linear
130            .iter()
131            .map(|&x| (x * linear_scale).round().as_())
132            .collect::<Vec<_>>();
133        let scale: f32 = (1i32 << PRECISION) as f32;
134        let source_matrix = self.adaptation_matrix;
135        let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
136        for i in 0..3 {
137            for j in 0..3 {
138                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
139            }
140        }
141        TransformMatrixShaperFp {
142            r_linear: new_box_r,
143            g_linear: new_box_g,
144            b_linear: new_box_b,
145            r_gamma: self.r_gamma.clone(),
146            g_gamma: self.g_gamma.clone(),
147            b_gamma: self.b_gamma.clone(),
148            adaptation_matrix: dst_matrix,
149        }
150    }
151}
152
153impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
154    TransformMatrixShaperOptimized<T, BUCKET>
155{
156    #[allow(dead_code)]
157    pub(crate) fn to_q2_13_n<
158        R: Copy + 'static + Default,
159        const PRECISION: i32,
160        const LINEAR_CAP: usize,
161    >(
162        &self,
163        gamma_lut: usize,
164        bit_depth: usize,
165    ) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
166    where
167        f32: AsPrimitive<R>,
168    {
169        let linear_scale = if T::FINITE {
170            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
171            ((1 << bit_depth) - 1) as f32 * lut_scale
172        } else {
173            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
174            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
175        };
176        let mut new_box_linear = Box::new([R::default(); BUCKET]);
177        for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
178            *dst = (*src * linear_scale).round().as_();
179        }
180        let scale: f32 = (1i32 << PRECISION) as f32;
181        let source_matrix = self.adaptation_matrix;
182        let mut dst_matrix = Matrix3::<i16> {
183            v: [[i16::default(); 3]; 3],
184        };
185        for i in 0..3 {
186            for j in 0..3 {
187                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
188            }
189        }
190        TransformMatrixShaperFixedPointOpt {
191            linear: new_box_linear,
192            gamma: self.gamma.clone(),
193            adaptation_matrix: dst_matrix,
194        }
195    }
196
197    #[allow(dead_code)]
198    pub(crate) fn to_q2_13_i<R: Copy + 'static + Default, const PRECISION: i32>(
199        &self,
200        gamma_lut: usize,
201        bit_depth: usize,
202    ) -> TransformMatrixShaperFpOptVec<R, i16, T>
203    where
204        f32: AsPrimitive<R>,
205    {
206        let linear_scale = if T::FINITE {
207            let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32;
208            ((1 << bit_depth) - 1) as f32 * lut_scale
209        } else {
210            let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
211            (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
212        };
213        let new_box_linear = self
214            .linear
215            .iter()
216            .map(|&x| (x * linear_scale).round().as_())
217            .collect::<Vec<R>>();
218        let scale: f32 = (1i32 << PRECISION) as f32;
219        let source_matrix = self.adaptation_matrix;
220        let mut dst_matrix = Matrix3::<i16> {
221            v: [[i16::default(); 3]; 3],
222        };
223        for i in 0..3 {
224            for j in 0..3 {
225                dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
226            }
227        }
228        TransformMatrixShaperFpOptVec {
229            linear: new_box_linear,
230            gamma: self.gamma.clone(),
231            adaptation_matrix: dst_matrix,
232        }
233    }
234
235    #[cfg(all(target_arch = "aarch64", feature = "neon_shaper_fixed_point_paths"))]
236    pub(crate) fn to_q1_30_n<R: Copy + 'static + Default, const PRECISION: i32>(
237        &self,
238        gamma_lut: usize,
239        bit_depth: usize,
240    ) -> TransformMatrixShaperFpOptVec<R, i32, T>
241    where
242        f32: AsPrimitive<R>,
243        f64: AsPrimitive<R>,
244    {
245        // It is important to scale 1 bit more to compensate vqrdmlah Q0.31, because we're going to use Q1.30
246        let table_size = if T::FINITE {
247            (1 << bit_depth) - 1
248        } else {
249            T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
250        };
251        let ext_bp = if T::FINITE {
252            bit_depth as u32 + 1
253        } else {
254            let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
255            bp + 1
256        };
257        let linear_scale = {
258            let lut_scale = (gamma_lut - 1) as f64 / table_size as f64;
259            ((1u32 << ext_bp) - 1) as f64 * lut_scale
260        };
261        let new_box_linear = self
262            .linear
263            .iter()
264            .map(|&v| (v as f64 * linear_scale).round().as_())
265            .collect::<Vec<R>>();
266        let scale: f64 = (1i64 << PRECISION) as f64;
267        let source_matrix = self.adaptation_matrix;
268        let mut dst_matrix = Matrix3::<i32> {
269            v: [[i32::default(); 3]; 3],
270        };
271        for i in 0..3 {
272            for j in 0..3 {
273                dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
274            }
275        }
276        TransformMatrixShaperFpOptVec {
277            linear: new_box_linear,
278            gamma: self.gamma.clone(),
279            adaptation_matrix: dst_matrix,
280        }
281    }
282}
283
284#[allow(unused)]
285struct TransformMatrixShaperScalar<
286    T: Clone,
287    const SRC_LAYOUT: u8,
288    const DST_LAYOUT: u8,
289    const LINEAR_CAP: usize,
290> {
291    pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
292    pub(crate) gamma_lut: usize,
293    pub(crate) bit_depth: usize,
294}
295
296#[allow(unused)]
297struct TransformMatrixShaperOptScalar<
298    T: Clone,
299    const SRC_LAYOUT: u8,
300    const DST_LAYOUT: u8,
301    const LINEAR_CAP: usize,
302> {
303    pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
304    pub(crate) gamma_lut: usize,
305    pub(crate) bit_depth: usize,
306}
307
308#[cfg(any(
309    any(target_arch = "x86", target_arch = "x86_64"),
310    target_arch = "aarch64"
311))]
312#[allow(unused)]
313macro_rules! create_rgb_xyz_dependant_executor {
314    ($dep_name: ident, $dependant: ident, $shaper: ident) => {
315        pub(crate) fn $dep_name<
316            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
317            const LINEAR_CAP: usize,
318        >(
319            src_layout: Layout,
320            dst_layout: Layout,
321            profile: $shaper<T, LINEAR_CAP>,
322            gamma_lut: usize,
323            bit_depth: usize,
324        ) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
325        where
326            u32: AsPrimitive<T>,
327        {
328            if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
329                return Ok(Arc::new($dependant::<
330                    T,
331                    { Layout::Rgba as u8 },
332                    { Layout::Rgba as u8 },
333                    LINEAR_CAP,
334                > {
335                    profile,
336                    bit_depth,
337                    gamma_lut,
338                }));
339            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
340                return Ok(Arc::new($dependant::<
341                    T,
342                    { Layout::Rgb as u8 },
343                    { Layout::Rgba as u8 },
344                    LINEAR_CAP,
345                > {
346                    profile,
347                    bit_depth,
348                    gamma_lut,
349                }));
350            } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
351                return Ok(Arc::new($dependant::<
352                    T,
353                    { Layout::Rgba as u8 },
354                    { Layout::Rgb as u8 },
355                    LINEAR_CAP,
356                > {
357                    profile,
358                    bit_depth,
359                    gamma_lut,
360                }));
361            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
362                return Ok(Arc::new($dependant::<
363                    T,
364                    { Layout::Rgb as u8 },
365                    { Layout::Rgb as u8 },
366                    LINEAR_CAP,
367                > {
368                    profile,
369                    bit_depth,
370                    gamma_lut,
371                }));
372            }
373            Err(CmsError::UnsupportedProfileConnection)
374        }
375    };
376}
377
378#[cfg(any(
379    any(target_arch = "x86", target_arch = "x86_64"),
380    target_arch = "aarch64"
381))]
382#[allow(unused)]
383macro_rules! create_rgb_xyz_dependant_executor_to_v {
384    ($dep_name: ident, $dependant: ident, $shaper: ident) => {
385        pub(crate) fn $dep_name<
386            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
387            const LINEAR_CAP: usize,
388        >(
389            src_layout: Layout,
390            dst_layout: Layout,
391            profile: $shaper<T, LINEAR_CAP>,
392            gamma_lut: usize,
393            bit_depth: usize,
394        ) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
395        where
396            u32: AsPrimitive<T>,
397        {
398            let profile = profile.convert_to_v();
399            if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
400                return Ok(Arc::new($dependant::<
401                    T,
402                    { Layout::Rgba as u8 },
403                    { Layout::Rgba as u8 },
404                > {
405                    profile,
406                    bit_depth,
407                    gamma_lut,
408                }));
409            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
410                return Ok(Arc::new($dependant::<
411                    T,
412                    { Layout::Rgb as u8 },
413                    { Layout::Rgba as u8 },
414                > {
415                    profile,
416                    bit_depth,
417                    gamma_lut,
418                }));
419            } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
420                return Ok(Arc::new($dependant::<
421                    T,
422                    { Layout::Rgba as u8 },
423                    { Layout::Rgb as u8 },
424                > {
425                    profile,
426                    bit_depth,
427                    gamma_lut,
428                }));
429            } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
430                return Ok(Arc::new($dependant::<
431                    T,
432                    { Layout::Rgb as u8 },
433                    { Layout::Rgb as u8 },
434                > {
435                    profile,
436                    bit_depth,
437                    gamma_lut,
438                }));
439            }
440            Err(CmsError::UnsupportedProfileConnection)
441        }
442    };
443}
444
445#[cfg(any(
446    any(target_arch = "x86", target_arch = "x86_64"),
447    target_arch = "aarch64"
448))]
449#[allow(unused)]
450macro_rules! create_in_place_opt_rgb_xyz_fp_to_v {
451    ($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
452        pub(crate) fn $dep_name<
453            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
454            const LINEAR_CAP: usize,
455            const PRECISION: i32,
456        >(
457            layout: Layout,
458            profile: $shaper<T, LINEAR_CAP>,
459            gamma_lut: usize,
460            bit_depth: usize,
461        ) -> Result<Arc<dyn InPlaceTransformExecutor<T> + Send + Sync>, CmsError>
462        where
463            u32: AsPrimitive<T>,
464        {
465            let q2_13_profile = profile.to_q2_13_i::<$resolution, PRECISION>(gamma_lut, bit_depth);
466            if layout == Layout::Rgba {
467                return Ok(Arc::new($dependant::<
468                    T,
469                    { Layout::Rgba as u8 },
470                    { Layout::Rgba as u8 },
471                    PRECISION,
472                > {
473                    profile: q2_13_profile,
474                    bit_depth,
475                    gamma_lut,
476                }));
477            } else if layout == Layout::Rgb {
478                return Ok(Arc::new($dependant::<
479                    T,
480                    { Layout::Rgb as u8 },
481                    { Layout::Rgb as u8 },
482                    PRECISION,
483                > {
484                    profile: q2_13_profile,
485                    bit_depth,
486                    gamma_lut,
487                }));
488            }
489            Err(CmsError::UnsupportedProfileConnection)
490        }
491    };
492}
493
494#[allow(unused)]
495macro_rules! create_in_place_rgb_xyz {
496    ($dep_name: ident, $dependant: ident, $shaper: ident) => {
497        pub(crate) fn $dep_name<
498            T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
499            const LINEAR_CAP: usize,
500        >(
501            layout: Layout,
502            profile: $shaper<T, LINEAR_CAP>,
503            gamma_lut: usize,
504            bit_depth: usize,
505        ) -> Result<Arc<dyn InPlaceTransformExecutor<T> + Send + Sync>, CmsError>
506        where
507            u32: AsPrimitive<T>,
508        {
509            if layout == Layout::Rgba {
510                return Ok(Arc::new($dependant::<
511                    T,
512                    { Layout::Rgba as u8 },
513                    { Layout::Rgba as u8 },
514                    LINEAR_CAP,
515                > {
516                    profile,
517                    bit_depth,
518                    gamma_lut,
519                }));
520            } else if layout == Layout::Rgb {
521                return Ok(Arc::new($dependant::<
522                    T,
523                    { Layout::Rgb as u8 },
524                    { Layout::Rgb as u8 },
525                    LINEAR_CAP,
526                > {
527                    profile,
528                    bit_depth,
529                    gamma_lut,
530                }));
531            }
532            Err(CmsError::UnsupportedProfileConnection)
533        }
534    };
535}
536
537#[cfg(all(
538    any(target_arch = "x86", target_arch = "x86_64"),
539    feature = "sse_shaper_optimized_paths"
540))]
541use crate::conversions::sse::TransformShaperRgbOptSse;
542
543#[cfg(all(
544    any(target_arch = "x86", target_arch = "x86_64"),
545    feature = "sse_shaper_paths"
546))]
547use crate::conversions::sse::TransformShaperRgbSse;
548
549#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_paths"))]
550use crate::conversions::avx::TransformShaperRgbAvx;
551#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_optimized_paths"))]
552use crate::conversions::avx::TransformShaperRgbOptAvx;
553
554#[cfg(all(
555    any(target_arch = "x86", target_arch = "x86_64"),
556    feature = "sse_shaper_paths"
557))]
558create_rgb_xyz_dependant_executor!(
559    make_rgb_xyz_rgb_transform_sse_41,
560    TransformShaperRgbSse,
561    TransformMatrixShaper
562);
563
564#[cfg(all(
565    any(target_arch = "x86", target_arch = "x86_64"),
566    feature = "sse_shaper_optimized_paths"
567))]
568create_rgb_xyz_dependant_executor_to_v!(
569    make_rgb_xyz_rgb_transform_sse_41_opt,
570    TransformShaperRgbOptSse,
571    TransformMatrixShaperOptimized
572);
573
574#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_paths"))]
575create_rgb_xyz_dependant_executor!(
576    make_rgb_xyz_rgb_transform_avx2,
577    TransformShaperRgbAvx,
578    TransformMatrixShaper
579);
580
581#[cfg(all(target_arch = "x86_64", feature = "avx_shaper_optimized_paths"))]
582create_rgb_xyz_dependant_executor_to_v!(
583    make_rgb_xyz_rgb_transform_avx2_opt,
584    TransformShaperRgbOptAvx,
585    TransformMatrixShaperOptimized
586);
587
588#[cfg(all(target_arch = "x86_64", feature = "avx512_shaper_optimized_paths"))]
589use crate::conversions::avx512::TransformShaperRgbOptAvx512;
590
591#[cfg(all(target_arch = "x86_64", feature = "avx512_shaper_optimized_paths"))]
592create_rgb_xyz_dependant_executor!(
593    make_rgb_xyz_rgb_transform_avx512_opt,
594    TransformShaperRgbOptAvx512,
595    TransformMatrixShaperOptimized
596);
597
598#[cfg(not(all(target_arch = "aarch64", feature = "neon_shaper_paths")))]
599pub(crate) fn make_rgb_xyz_rgb_transform<
600    T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
601    const LINEAR_CAP: usize,
602>(
603    src_layout: Layout,
604    dst_layout: Layout,
605    profile: TransformMatrixShaper<T, LINEAR_CAP>,
606    gamma_lut: usize,
607    bit_depth: usize,
608) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
609where
610    u32: AsPrimitive<T>,
611{
612    #[cfg(all(feature = "avx_shaper_paths", target_arch = "x86_64"))]
613    if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
614        return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP>(
615            src_layout, dst_layout, profile, gamma_lut, bit_depth,
616        );
617    }
618    #[cfg(all(
619        feature = "sse_shaper_paths",
620        any(target_arch = "x86", target_arch = "x86_64")
621    ))]
622    if std::arch::is_x86_feature_detected!("sse4.1") {
623        return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP>(
624            src_layout, dst_layout, profile, gamma_lut, bit_depth,
625        );
626    }
627    if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
628        return Ok(Arc::new(TransformMatrixShaperScalar::<
629            T,
630            { Layout::Rgba as u8 },
631            { Layout::Rgba as u8 },
632            LINEAR_CAP,
633        > {
634            profile,
635            gamma_lut,
636            bit_depth,
637        }));
638    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
639        return Ok(Arc::new(TransformMatrixShaperScalar::<
640            T,
641            { Layout::Rgb as u8 },
642            { Layout::Rgba as u8 },
643            LINEAR_CAP,
644        > {
645            profile,
646            gamma_lut,
647            bit_depth,
648        }));
649    } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
650        return Ok(Arc::new(TransformMatrixShaperScalar::<
651            T,
652            { Layout::Rgba as u8 },
653            { Layout::Rgb as u8 },
654            LINEAR_CAP,
655        > {
656            profile,
657            gamma_lut,
658            bit_depth,
659        }));
660    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
661        return Ok(Arc::new(TransformMatrixShaperScalar::<
662            T,
663            { Layout::Rgb as u8 },
664            { Layout::Rgb as u8 },
665            LINEAR_CAP,
666        > {
667            profile,
668            gamma_lut,
669            bit_depth,
670        }));
671    }
672    Err(CmsError::UnsupportedProfileConnection)
673}
674
675#[cfg(not(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths")))]
676pub(crate) fn make_rgb_xyz_rgb_transform_opt<
677    T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
678    const LINEAR_CAP: usize,
679>(
680    src_layout: Layout,
681    dst_layout: Layout,
682    profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
683    gamma_lut: usize,
684    bit_depth: usize,
685) -> Result<Arc<dyn TransformExecutor<T> + Send + Sync>, CmsError>
686where
687    u32: AsPrimitive<T>,
688{
689    #[cfg(all(feature = "avx512_shaper_optimized_paths", target_arch = "x86_64"))]
690    if std::arch::is_x86_feature_detected!("avx512bw")
691        && std::arch::is_x86_feature_detected!("avx512vl")
692        && std::arch::is_x86_feature_detected!("fma")
693    {
694        return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP>(
695            src_layout, dst_layout, profile, gamma_lut, bit_depth,
696        );
697    }
698    #[cfg(all(feature = "avx_shaper_optimized_paths", target_arch = "x86_64"))]
699    if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") {
700        return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP>(
701            src_layout, dst_layout, profile, gamma_lut, bit_depth,
702        );
703    }
704    #[cfg(all(
705        feature = "sse_shaper_optimized_paths",
706        any(target_arch = "x86", target_arch = "x86_64")
707    ))]
708    if std::arch::is_x86_feature_detected!("sse4.1") {
709        return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP>(
710            src_layout, dst_layout, profile, gamma_lut, bit_depth,
711        );
712    }
713    if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
714        return Ok(Arc::new(TransformMatrixShaperOptScalar::<
715            T,
716            { Layout::Rgba as u8 },
717            { Layout::Rgba as u8 },
718            LINEAR_CAP,
719        > {
720            profile,
721            gamma_lut,
722            bit_depth,
723        }));
724    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
725        return Ok(Arc::new(TransformMatrixShaperOptScalar::<
726            T,
727            { Layout::Rgb as u8 },
728            { Layout::Rgba as u8 },
729            LINEAR_CAP,
730        > {
731            profile,
732            gamma_lut,
733            bit_depth,
734        }));
735    } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
736        return Ok(Arc::new(TransformMatrixShaperOptScalar::<
737            T,
738            { Layout::Rgba as u8 },
739            { Layout::Rgb as u8 },
740            LINEAR_CAP,
741        > {
742            profile,
743            gamma_lut,
744            bit_depth,
745        }));
746    } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
747        return Ok(Arc::new(TransformMatrixShaperOptScalar::<
748            T,
749            { Layout::Rgb as u8 },
750            { Layout::Rgb as u8 },
751            LINEAR_CAP,
752        > {
753            profile,
754            gamma_lut,
755            bit_depth,
756        }));
757    }
758    Err(CmsError::UnsupportedProfileConnection)
759}
760
761#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_paths"))]
762use crate::conversions::neon::TransformShaperRgbNeon;
763#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths"))]
764use crate::conversions::neon::TransformShaperRgbOptNeon;
765use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec;
766use crate::conversions::rgbxyz_fixed::{
767    TransformMatrixShaperFixedPointOpt, TransformMatrixShaperFp,
768};
769use crate::transform::PointeeSizeExpressible;
770
771#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_paths"))]
772create_rgb_xyz_dependant_executor_to_v!(
773    make_rgb_xyz_rgb_transform,
774    TransformShaperRgbNeon,
775    TransformMatrixShaper
776);
777
778#[cfg(feature = "in_place")]
779create_in_place_rgb_xyz!(
780    make_in_place_rgb_xyz_transform,
781    TransformMatrixShaperScalar,
782    TransformMatrixShaper
783);
784
785#[cfg(all(target_arch = "aarch64", feature = "neon_shaper_optimized_paths"))]
786create_rgb_xyz_dependant_executor_to_v!(
787    make_rgb_xyz_rgb_transform_opt,
788    TransformShaperRgbOptNeon,
789    TransformMatrixShaperOptimized
790);
791
792#[cfg(feature = "in_place")]
793create_in_place_rgb_xyz!(
794    make_rgb_xyz_in_place_transform_opt,
795    TransformMatrixShaperOptScalar,
796    TransformMatrixShaperOptimized
797);
798
799#[cfg(all(
800    target_arch = "aarch64",
801    feature = "in_place",
802    feature = "neon_shaper_fixed_point_paths"
803))]
804use crate::conversions::neon::TransformShaperQ2_13NeonOpt;
805
806#[cfg(all(
807    target_arch = "aarch64",
808    feature = "in_place",
809    feature = "neon_shaper_fixed_point_paths"
810))]
811create_in_place_opt_rgb_xyz_fp_to_v!(
812    make_rgb_xyz_in_place_transform_q2_13_opt,
813    TransformShaperQ2_13NeonOpt,
814    i16,
815    TransformMatrixShaperOptimized
816);
817
818#[cfg(all(
819    any(target_arch = "x86_64", target_arch = "x86"),
820    feature = "in_place",
821    feature = "sse_shaper_fixed_point_paths"
822))]
823use crate::conversions::sse::TransformShaperQ2_13OptSse;
824
825#[cfg(all(
826    any(target_arch = "x86_64", target_arch = "x86"),
827    feature = "in_place",
828    feature = "sse_shaper_fixed_point_paths"
829))]
830create_in_place_opt_rgb_xyz_fp_to_v!(
831    make_sse_rgb_xyz_in_place_transform_q2_13_opt,
832    TransformShaperQ2_13OptSse,
833    i32,
834    TransformMatrixShaperOptimized
835);
836
837#[cfg(all(
838    target_arch = "x86_64",
839    feature = "in_place",
840    feature = "avx_shaper_fixed_point_paths"
841))]
842use crate::conversions::avx::TransformShaperRgbQ2_13OptAvx;
843
844#[cfg(all(
845    target_arch = "x86_64",
846    feature = "in_place",
847    feature = "avx_shaper_fixed_point_paths"
848))]
849create_in_place_opt_rgb_xyz_fp_to_v!(
850    make_avx_rgb_xyz_in_place_transform_q2_13_opt,
851    TransformShaperRgbQ2_13OptAvx,
852    i32,
853    TransformMatrixShaperOptimized
854);
855
856#[allow(unused)]
857impl<
858    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
859    const SRC_LAYOUT: u8,
860    const DST_LAYOUT: u8,
861    const LINEAR_CAP: usize,
862> TransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
863where
864    u32: AsPrimitive<T>,
865{
866    fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
867        use crate::mlaf::mlaf;
868        let src_cn = Layout::from(SRC_LAYOUT);
869        let dst_cn = Layout::from(DST_LAYOUT);
870        let src_channels = src_cn.channels();
871        let dst_channels = dst_cn.channels();
872
873        if src.len() / src_channels != dst.len() / dst_channels {
874            return Err(CmsError::LaneSizeMismatch);
875        }
876        if src.len() % src_channels != 0 {
877            return Err(CmsError::LaneMultipleOfChannels);
878        }
879        if dst.len() % dst_channels != 0 {
880            return Err(CmsError::LaneMultipleOfChannels);
881        }
882
883        let transform = self.profile.adaptation_matrix;
884        let scale = (self.gamma_lut - 1) as f32;
885        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
886
887        for (src, dst) in src
888            .chunks_exact(src_channels)
889            .zip(dst.chunks_exact_mut(dst_channels))
890        {
891            let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
892            let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
893            let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
894            let a = if src_channels == 4 {
895                src[src_cn.a_i()]
896            } else {
897                max_colors
898            };
899
900            let new_r = mlaf(
901                0.5f32,
902                mlaf(
903                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
904                    b,
905                    transform.v[0][2],
906                )
907                .max(0f32)
908                .min(1f32),
909                scale,
910            );
911
912            let new_g = mlaf(
913                0.5f32,
914                mlaf(
915                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
916                    b,
917                    transform.v[1][2],
918                )
919                .max(0f32)
920                .min(1f32),
921                scale,
922            );
923
924            let new_b = mlaf(
925                0.5f32,
926                mlaf(
927                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
928                    b,
929                    transform.v[2][2],
930                )
931                .max(0f32)
932                .min(1f32),
933                scale,
934            );
935
936            dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
937            dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
938            dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
939            if dst_channels == 4 {
940                dst[dst_cn.a_i()] = a;
941            }
942        }
943
944        Ok(())
945    }
946}
947
948#[cfg(feature = "in_place")]
949impl<
950    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
951    const SRC_LAYOUT: u8,
952    const DST_LAYOUT: u8,
953    const LINEAR_CAP: usize,
954> InPlaceTransformExecutor<T> for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
955where
956    u32: AsPrimitive<T>,
957{
958    fn transform(&self, dst: &mut [T]) -> Result<(), CmsError> {
959        use crate::mlaf::mlaf;
960        assert_eq!(
961            SRC_LAYOUT, DST_LAYOUT,
962            "This is in-place transform, layout must not diverge"
963        );
964        let src_cn = Layout::from(SRC_LAYOUT);
965        let src_channels = src_cn.channels();
966
967        if dst.len() % src_channels != 0 {
968            return Err(CmsError::LaneMultipleOfChannels);
969        }
970
971        let transform = self.profile.adaptation_matrix;
972        let scale = (self.gamma_lut - 1) as f32;
973        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
974
975        for dst in dst.chunks_exact_mut(src_channels) {
976            let r = self.profile.r_linear[dst[src_cn.r_i()]._as_usize()];
977            let g = self.profile.g_linear[dst[src_cn.g_i()]._as_usize()];
978            let b = self.profile.b_linear[dst[src_cn.b_i()]._as_usize()];
979            let a = if src_channels == 4 {
980                dst[src_cn.a_i()]
981            } else {
982                max_colors
983            };
984
985            let new_r = mlaf(
986                0.5f32,
987                mlaf(
988                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
989                    b,
990                    transform.v[0][2],
991                )
992                .max(0f32)
993                .min(1f32),
994                scale,
995            );
996
997            let new_g = mlaf(
998                0.5f32,
999                mlaf(
1000                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1001                    b,
1002                    transform.v[1][2],
1003                )
1004                .max(0f32)
1005                .min(1f32),
1006                scale,
1007            );
1008
1009            let new_b = mlaf(
1010                0.5f32,
1011                mlaf(
1012                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1013                    b,
1014                    transform.v[2][2],
1015                )
1016                .max(0f32)
1017                .min(1f32),
1018                scale,
1019            );
1020
1021            dst[src_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
1022            dst[src_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
1023            dst[src_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
1024            if src_channels == 4 {
1025                dst[src_cn.a_i()] = a;
1026            }
1027        }
1028
1029        Ok(())
1030    }
1031}
1032
1033#[allow(unused)]
1034impl<
1035    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
1036    const SRC_LAYOUT: u8,
1037    const DST_LAYOUT: u8,
1038    const LINEAR_CAP: usize,
1039> TransformExecutor<T> for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
1040where
1041    u32: AsPrimitive<T>,
1042{
1043    fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
1044        use crate::mlaf::mlaf;
1045        let src_cn = Layout::from(SRC_LAYOUT);
1046        let dst_cn = Layout::from(DST_LAYOUT);
1047        let src_channels = src_cn.channels();
1048        let dst_channels = dst_cn.channels();
1049
1050        if src.len() / src_channels != dst.len() / dst_channels {
1051            return Err(CmsError::LaneSizeMismatch);
1052        }
1053        if src.len() % src_channels != 0 {
1054            return Err(CmsError::LaneMultipleOfChannels);
1055        }
1056        if dst.len() % dst_channels != 0 {
1057            return Err(CmsError::LaneMultipleOfChannels);
1058        }
1059
1060        let transform = self.profile.adaptation_matrix;
1061        let scale = (self.gamma_lut - 1) as f32;
1062        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
1063
1064        for (src, dst) in src
1065            .chunks_exact(src_channels)
1066            .zip(dst.chunks_exact_mut(dst_channels))
1067        {
1068            let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
1069            let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
1070            let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
1071            let a = if src_channels == 4 {
1072                src[src_cn.a_i()]
1073            } else {
1074                max_colors
1075            };
1076
1077            let new_r = mlaf(
1078                0.5f32,
1079                mlaf(
1080                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
1081                    b,
1082                    transform.v[0][2],
1083                )
1084                .max(0f32)
1085                .min(1f32),
1086                scale,
1087            );
1088
1089            let new_g = mlaf(
1090                0.5f32,
1091                mlaf(
1092                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1093                    b,
1094                    transform.v[1][2],
1095                )
1096                .max(0f32)
1097                .min(1f32),
1098                scale,
1099            );
1100
1101            let new_b = mlaf(
1102                0.5f32,
1103                mlaf(
1104                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1105                    b,
1106                    transform.v[2][2],
1107                )
1108                .max(0f32)
1109                .min(1f32),
1110                scale,
1111            );
1112
1113            dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
1114            dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
1115            dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
1116            if dst_channels == 4 {
1117                dst[dst_cn.a_i()] = a;
1118            }
1119        }
1120
1121        Ok(())
1122    }
1123}
1124
1125#[cfg(feature = "in_place")]
1126impl<
1127    T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
1128    const SRC_LAYOUT: u8,
1129    const DST_LAYOUT: u8,
1130    const LINEAR_CAP: usize,
1131> InPlaceTransformExecutor<T>
1132    for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP>
1133where
1134    u32: AsPrimitive<T>,
1135{
1136    fn transform(&self, dst: &mut [T]) -> Result<(), CmsError> {
1137        use crate::mlaf::mlaf;
1138        assert_eq!(
1139            SRC_LAYOUT, DST_LAYOUT,
1140            "This is in-place transform, layout must not diverge"
1141        );
1142        let dst_cn = Layout::from(DST_LAYOUT);
1143        let dst_channels = dst_cn.channels();
1144
1145        if dst.len() % dst_channels != 0 {
1146            return Err(CmsError::LaneMultipleOfChannels);
1147        }
1148
1149        let transform = self.profile.adaptation_matrix;
1150        let scale = (self.gamma_lut - 1) as f32;
1151        let max_colors: T = ((1 << self.bit_depth) - 1).as_();
1152
1153        for dst in dst.chunks_exact_mut(dst_channels) {
1154            let r = self.profile.linear[dst[dst_cn.r_i()]._as_usize()];
1155            let g = self.profile.linear[dst[dst_cn.g_i()]._as_usize()];
1156            let b = self.profile.linear[dst[dst_cn.b_i()]._as_usize()];
1157            let a = if dst_channels == 4 {
1158                dst[dst_cn.a_i()]
1159            } else {
1160                max_colors
1161            };
1162
1163            let new_r = mlaf(
1164                0.5f32,
1165                mlaf(
1166                    mlaf(r * transform.v[0][0], g, transform.v[0][1]),
1167                    b,
1168                    transform.v[0][2],
1169                )
1170                .max(0f32)
1171                .min(1f32),
1172                scale,
1173            );
1174
1175            let new_g = mlaf(
1176                0.5f32,
1177                mlaf(
1178                    mlaf(r * transform.v[1][0], g, transform.v[1][1]),
1179                    b,
1180                    transform.v[1][2],
1181                )
1182                .max(0f32)
1183                .min(1f32),
1184                scale,
1185            );
1186
1187            let new_b = mlaf(
1188                0.5f32,
1189                mlaf(
1190                    mlaf(r * transform.v[2][0], g, transform.v[2][1]),
1191                    b,
1192                    transform.v[2][2],
1193                )
1194                .max(0f32)
1195                .min(1f32),
1196                scale,
1197            );
1198
1199            dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
1200            dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
1201            dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
1202            if dst_channels == 4 {
1203                dst[dst_cn.a_i()] = a;
1204            }
1205        }
1206
1207        Ok(())
1208    }
1209}