1use crate::{PremultipliedColorU8, SpreadMode, PixmapRef};
19
20use crate::geom::ScreenIntRect;
21use crate::pixmap::SubPixmapMut;
22use crate::wide::{f32x8, i32x8, u32x8};
23
24pub const STAGE_WIDTH: usize = 8;
25
26pub type StageFn = fn(p: &mut Pipeline);
27
28pub struct Pipeline<'a, 'b: 'a> {
29 index: usize,
30 functions: &'a [StageFn],
31 pixmap_src: PixmapRef<'a>,
32 pixmap_dst: &'a mut SubPixmapMut<'b>,
33 ctx: &'a mut super::Context, mask_ctx: super::MaskCtx<'a>,
35 aa_mask_ctx: super::AAMaskCtx,
36 r: f32x8,
37 g: f32x8,
38 b: f32x8,
39 a: f32x8,
40 dr: f32x8,
41 dg: f32x8,
42 db: f32x8,
43 da: f32x8,
44 tail: usize,
45 dx: usize,
46 dy: usize,
47}
48
49impl Pipeline<'_, '_> {
50 #[inline(always)]
51 fn next_stage(&mut self) {
52 let next: fn(&mut Self) = self.functions[self.index];
53 self.index += 1;
54 next(self);
55 }
56}
57
58pub const STAGES: &[StageFn; super::STAGES_COUNT] = &[
60 move_source_to_destination,
61 move_destination_to_source,
62 clamp_0,
63 clamp_a,
64 premultiply,
65 uniform_color,
66 seed_shader,
67 load_dst,
68 store,
69 load_dst_u8,
70 store_u8,
71 gather,
72 load_mask_u8,
73 mask_u8,
74 scale_u8,
75 lerp_u8,
76 scale_1_float,
77 lerp_1_float,
78 destination_atop,
79 destination_in,
80 destination_out,
81 destination_over,
82 source_atop,
83 source_in,
84 source_out,
85 source_over,
86 clear,
87 modulate,
88 multiply,
89 plus,
90 screen,
91 xor,
92 color_burn,
93 color_dodge,
94 darken,
95 difference,
96 exclusion,
97 hard_light,
98 lighten,
99 overlay,
100 soft_light,
101 hue,
102 saturation,
103 color,
104 luminosity,
105 source_over_rgba,
106 transform,
107 reflect,
108 repeat,
109 bilinear,
110 bicubic,
111 pad_x1,
112 reflect_x1,
113 repeat_x1,
114 gradient,
115 evenly_spaced_2_stop_gradient,
116 xy_to_unit_angle,
117 xy_to_radius,
118 xy_to_2pt_conical_focal_on_circle,
119 xy_to_2pt_conical_well_behaved,
120 xy_to_2pt_conical_smaller,
121 xy_to_2pt_conical_greater,
122 xy_to_2pt_conical_strip,
123 mask_2pt_conical_nan,
124 mask_2pt_conical_degenerates,
125 apply_vector_mask,
126 alter_2pt_conical_compensate_focal,
127 alter_2pt_conical_unswap,
128 negate_x,
129 apply_concentric_scale_bias,
130 gamma_expand_2,
131 gamma_expand_dst_2,
132 gamma_compress_2,
133 gamma_expand_22,
134 gamma_expand_dst_22,
135 gamma_compress_22,
136 gamma_expand_srgb,
137 gamma_expand_dst_srgb,
138 gamma_compress_srgb,
139];
140
141pub fn fn_ptr(f: StageFn) -> *const () {
142 f as *const ()
143}
144
145#[inline(never)]
146pub fn start(
147 functions: &[StageFn],
148 functions_tail: &[StageFn],
149 rect: &ScreenIntRect,
150 aa_mask_ctx: super::AAMaskCtx,
151 mask_ctx: super::MaskCtx,
152 ctx: &mut super::Context,
153 pixmap_src: PixmapRef,
154 pixmap_dst: &mut SubPixmapMut,
155) {
156 let mut p = Pipeline {
157 index: 0,
158 functions: &[],
159 pixmap_src,
160 pixmap_dst,
161 mask_ctx,
162 aa_mask_ctx,
163 ctx,
164 r: f32x8::default(),
165 g: f32x8::default(),
166 b: f32x8::default(),
167 a: f32x8::default(),
168 dr: f32x8::default(),
169 dg: f32x8::default(),
170 db: f32x8::default(),
171 da: f32x8::default(),
172 tail: 0,
173 dx: 0,
174 dy: 0,
175 };
176
177 for y in rect.y()..rect.bottom() {
178 let mut x = rect.x() as usize;
179 let end = rect.right() as usize;
180
181 p.functions = functions;
182 while x + STAGE_WIDTH <= end {
183 p.index = 0;
184 p.dx = x;
185 p.dy = y as usize;
186 p.tail = STAGE_WIDTH;
187 p.next_stage();
188 x += STAGE_WIDTH;
189 }
190
191 if x != end {
192 p.index = 0;
193 p.functions = functions_tail;
194 p.dx = x;
195 p.dy = y as usize;
196 p.tail = end - x;
197 p.next_stage();
198 }
199 }
200}
201
202fn move_source_to_destination(p: &mut Pipeline) {
203 p.dr = p.r;
204 p.dg = p.g;
205 p.db = p.b;
206 p.da = p.a;
207
208 p.next_stage();
209}
210
211fn premultiply(p: &mut Pipeline) {
212 p.r *= p.a;
213 p.g *= p.a;
214 p.b *= p.a;
215
216 p.next_stage();
217}
218
219fn move_destination_to_source(p: &mut Pipeline) {
220 p.r = p.dr;
221 p.g = p.dg;
222 p.b = p.db;
223 p.a = p.da;
224
225 p.next_stage();
226}
227
228fn clamp_0(p: &mut Pipeline) {
229 p.r = p.r.max(f32x8::default());
230 p.g = p.g.max(f32x8::default());
231 p.b = p.b.max(f32x8::default());
232 p.a = p.a.max(f32x8::default());
233
234 p.next_stage();
235}
236
237fn clamp_a(p: &mut Pipeline) {
238 p.r = p.r.min(f32x8::splat(1.0));
239 p.g = p.g.min(f32x8::splat(1.0));
240 p.b = p.b.min(f32x8::splat(1.0));
241 p.a = p.a.min(f32x8::splat(1.0));
242
243 p.next_stage();
244}
245
246fn uniform_color(p: &mut Pipeline) {
247 let ctx = &p.ctx.uniform_color;
248 p.r = f32x8::splat(ctx.r);
249 p.g = f32x8::splat(ctx.g);
250 p.b = f32x8::splat(ctx.b);
251 p.a = f32x8::splat(ctx.a);
252
253 p.next_stage();
254}
255
256fn seed_shader(p: &mut Pipeline) {
257 let iota = f32x8::from([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]);
258
259 p.r = f32x8::splat(p.dx as f32) + iota;
260 p.g = f32x8::splat(p.dy as f32 + 0.5);
261 p.b = f32x8::splat(1.0);
262 p.a = f32x8::default();
263
264 p.dr = f32x8::default();
265 p.dg = f32x8::default();
266 p.db = f32x8::default();
267 p.da = f32x8::default();
268
269 p.next_stage();
270}
271
272pub fn load_dst(p: &mut Pipeline) {
273 load_8888(p.pixmap_dst.slice4_at_xy(p.dx, p.dy), &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
274 p.next_stage();
275}
276
277pub fn load_dst_tail(p: &mut Pipeline) {
278 load_8888_tail(p.tail, p.pixmap_dst.slice_at_xy(p.dx, p.dy), &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
279 p.next_stage();
280}
281
282pub fn store(p: &mut Pipeline) {
283 store_8888(&p.r, &p.g, &p.b, &p.a, p.pixmap_dst.slice4_at_xy(p.dx, p.dy));
284 p.next_stage();
285}
286
287pub fn store_tail(p: &mut Pipeline) {
288 store_8888_tail(&p.r, &p.g, &p.b, &p.a, p.tail, p.pixmap_dst.slice_at_xy(p.dx, p.dy));
289 p.next_stage();
290}
291
292pub fn load_dst_u8(_: &mut Pipeline) {
294 }
296
297pub fn load_dst_u8_tail(_: &mut Pipeline) {
298 }
300
301pub fn store_u8(_: &mut Pipeline) {
302 }
304
305pub fn store_u8_tail(_: &mut Pipeline) {
306 }
308
309pub fn gather(p: &mut Pipeline) {
310 let ix = gather_ix(p.pixmap_src, p.r, p.g);
311 load_8888(&p.pixmap_src.gather(ix), &mut p.r, &mut p.g, &mut p.b, &mut p.a);
312
313 p.next_stage();
314}
315
316#[inline(always)]
317fn gather_ix(pixmap: PixmapRef, mut x: f32x8, mut y: f32x8) -> u32x8 {
318 let w = ulp_sub(pixmap.width() as f32);
320 let h = ulp_sub(pixmap.height() as f32);
321 x = x.max(f32x8::default()).min(f32x8::splat(w));
322 y = y.max(f32x8::default()).min(f32x8::splat(h));
323
324 (y.trunc_int() * i32x8::splat(pixmap.width() as i32) + x.trunc_int()).to_u32x8_bitcast()
325}
326
327#[inline(always)]
328fn ulp_sub(v: f32) -> f32 {
329 bytemuck::cast::<u32, f32>(bytemuck::cast::<f32, u32>(v) - 1)
331}
332
333fn load_mask_u8(_: &mut Pipeline) {
334 }
336
337fn mask_u8(p: &mut Pipeline) {
338 let offset = p.mask_ctx.offset(p.dx, p.dy);
339 let mut c = [0.0; 8];
340 for i in 0..p.tail {
341 c[i] = p.mask_ctx.data[offset + i] as f32;
342 }
343 let c = f32x8::from(c) / f32x8::splat(255.0);
344
345 if c == f32x8::default() {
346 return;
347 }
348
349 p.r *= c;
350 p.g *= c;
351 p.b *= c;
352 p.a *= c;
353
354 p.next_stage();
355}
356
357fn scale_u8(p: &mut Pipeline) {
358 let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail);
360 let c = f32x8::from([data[0] as f32, data[1] as f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
361 let c = c / f32x8::splat(255.0);
362
363 p.r *= c;
364 p.g *= c;
365 p.b *= c;
366 p.a *= c;
367
368 p.next_stage();
369}
370
371fn lerp_u8(p: &mut Pipeline) {
372 let data = p.aa_mask_ctx.copy_at_xy(p.dx, p.dy, p.tail);
374 let c = f32x8::from([data[0] as f32, data[1] as f32, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
375 let c = c / f32x8::splat(255.0);
376
377 p.r = lerp(p.dr, p.r, c);
378 p.g = lerp(p.dg, p.g, c);
379 p.b = lerp(p.db, p.b, c);
380 p.a = lerp(p.da, p.a, c);
381
382 p.next_stage();
383}
384
385fn scale_1_float(p: &mut Pipeline) {
386 let c = f32x8::splat(p.ctx.current_coverage);
387 p.r *= c;
388 p.g *= c;
389 p.b *= c;
390 p.a *= c;
391
392 p.next_stage();
393}
394
395fn lerp_1_float(p: &mut Pipeline) {
396 let c = f32x8::splat(p.ctx.current_coverage);
397 p.r = lerp(p.dr, p.r, c);
398 p.g = lerp(p.dg, p.g, c);
399 p.b = lerp(p.db, p.b, c);
400 p.a = lerp(p.da, p.a, c);
401
402 p.next_stage();
403}
404
405macro_rules! blend_fn {
406 ($name:ident, $f:expr) => {
407 fn $name(p: &mut Pipeline) {
408 p.r = $f(p.r, p.dr, p.a, p.da);
409 p.g = $f(p.g, p.dg, p.a, p.da);
410 p.b = $f(p.b, p.db, p.a, p.da);
411 p.a = $f(p.a, p.da, p.a, p.da);
412
413 p.next_stage();
414 }
415 };
416}
417
418blend_fn!(clear, |_, _, _, _| f32x8::default());
419blend_fn!(source_atop, |s, d, sa, da| s * da + d * inv(sa));
420blend_fn!(destination_atop, |s, d, sa, da| d * sa + s * inv(da));
421blend_fn!(source_in, |s, _, _, da| s * da);
422blend_fn!(destination_in, |_, d, sa, _| d * sa);
423blend_fn!(source_out, |s, _, _, da| s * inv(da));
424blend_fn!(destination_out, |_, d, sa, _| d * inv(sa));
425blend_fn!(source_over, |s, d, sa, _| mad(d, inv(sa), s));
426blend_fn!(destination_over, |s, d, _, da| mad(s, inv(da), d));
427blend_fn!(modulate, |s, d, _, _| s * d);
428blend_fn!(multiply, |s, d, sa, da| s * inv(da) + d * inv(sa) + s * d);
429blend_fn!(screen, |s, d, _, _| s + d - s * d);
430blend_fn!(xor, |s, d, sa, da| s * inv(da) + d * inv(sa));
431
432blend_fn!(plus, |s: f32x8, d: f32x8, _, _| (s + d).min(f32x8::splat(1.0)));
434
435macro_rules! blend_fn2 {
436 ($name:ident, $f:expr) => {
437 fn $name(p: &mut Pipeline) {
438 p.r = $f(p.r, p.dr, p.a, p.da);
440 p.g = $f(p.g, p.dg, p.a, p.da);
441 p.b = $f(p.b, p.db, p.a, p.da);
442 p.a = mad(p.da, inv(p.a), p.a);
443
444 p.next_stage();
445 }
446 };
447}
448
449blend_fn2!(darken, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).max(d * sa));
450blend_fn2!(lighten, |s: f32x8, d, sa, da: f32x8| s + d - (s * da).min(d * sa));
451blend_fn2!(difference, |s: f32x8, d, sa, da: f32x8| s + d - two((s * da).min(d * sa)));
452blend_fn2!(exclusion, |s: f32x8, d, _, _| s + d - two(s * d));
453
454blend_fn2!(color_burn, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8|
455 d.cmp_eq(da).blend(
456 d + s * inv(da),
457 s.cmp_eq(f32x8::default()).blend(
458 d * inv(sa),
459 sa * (da - da.min((da - d) * sa * s.recip_fast())) + s * inv(da) + d * inv(sa)
460 )
461 )
462);
463
464blend_fn2!(color_dodge, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8|
465 d.cmp_eq(f32x8::default()).blend(
466 s * inv(da),
467 s.cmp_eq(sa).blend(
468 s + d * inv(sa),
469 sa * da.min((d * sa) * (sa - s).recip_fast()) + s * inv(da) + d * inv(sa)
470 )
471 )
472);
473
474blend_fn2!(hard_light, |s: f32x8, d: f32x8, sa, da|
475 s * inv(da) + d * inv(sa) + two(s).cmp_le(sa).blend(
476 two(s * d),
477 sa * da - two((da - d) * (sa - s))
478 )
479);
480
481blend_fn2!(overlay, |s: f32x8, d: f32x8, sa, da|
482 s * inv(da) + d * inv(sa) + two(d).cmp_le(da).blend(
483 two(s * d),
484 sa * da - two((da - d) * (sa - s))
485 )
486);
487
488blend_fn2!(soft_light, |s: f32x8, d: f32x8, sa: f32x8, da: f32x8| {
489 let m = da.cmp_gt(f32x8::default()).blend(d / da, f32x8::default());
490 let s2 = two(s);
491 let m4 = two(two(m));
492
493 let dark_src = d * (sa + (s2 - sa) * (f32x8::splat(1.0) - m));
498 let dark_dst = (m4 * m4 + m4) * (m - f32x8::splat(1.0)) + f32x8::splat(7.0) * m;
499 let lite_dst = m.sqrt() - m;
500 let lite_src = d * sa + da * (s2 - sa)
501 * two(two(d)).cmp_le(da).blend(dark_dst, lite_dst); s * inv(da) + d * inv(sa) + s2.cmp_le(sa).blend(dark_src, lite_src) });
505
506macro_rules! blend_fn3 {
515 ($name:ident, $f:expr) => {
516 fn $name(p: &mut Pipeline) {
517 let (tr, tg, tb, ta) = $f(p.r, p.g, p.b, p.a, p.dr, p.dg, p.db, p.da);
518 p.r = tr;
519 p.g = tg;
520 p.b = tb;
521 p.a = ta;
522
523 p.next_stage();
524 }
525 };
526}
527
528blend_fn3!(hue, hue_k);
529
530#[inline(always)]
531fn hue_k(
532 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
533 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
534) -> (f32x8, f32x8, f32x8, f32x8) {
535 let rr = &mut (r * a);
536 let gg = &mut (g * a);
537 let bb = &mut (b * a);
538
539 set_sat(rr, gg, bb, sat(dr, dg, db) * a);
540 set_lum(rr, gg, bb, lum(dr, dg, db) * a);
541 clip_color(rr, gg, bb, a * da);
542
543 let r = r * inv(da) + dr * inv(a) + *rr;
544 let g = g * inv(da) + dg * inv(a) + *gg;
545 let b = b * inv(da) + db * inv(a) + *bb;
546 let a = a + da - a * da;
547
548 (r, g, b, a)
549}
550
551blend_fn3!(saturation, saturation_k);
552
553#[inline(always)]
554fn saturation_k(
555 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
556 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
557) -> (f32x8, f32x8, f32x8, f32x8) {
558 let rr = &mut (dr * a);
559 let gg = &mut (dg * a);
560 let bb = &mut (db * a);
561
562 set_sat(rr, gg, bb, sat(r, g, b) * da);
563 set_lum(rr, gg, bb, lum(dr, dg, db) * a); clip_color(rr, gg, bb, a * da);
565
566 let r = r * inv(da) + dr * inv(a) + *rr;
567 let g = g * inv(da) + dg * inv(a) + *gg;
568 let b = b * inv(da) + db * inv(a) + *bb;
569 let a = a + da - a * da;
570
571 (r, g, b, a)
572}
573
574blend_fn3!(color, color_k);
575
576#[inline(always)]
577fn color_k(
578 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
579 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
580) -> (f32x8, f32x8, f32x8, f32x8) {
581 let rr = &mut (r * da);
582 let gg = &mut (g * da);
583 let bb = &mut (b * da);
584
585 set_lum(rr, gg, bb, lum(dr, dg, db) * a);
586 clip_color(rr, gg, bb, a * da);
587
588 let r = r * inv(da) + dr * inv(a) + *rr;
589 let g = g * inv(da) + dg * inv(a) + *gg;
590 let b = b * inv(da) + db * inv(a) + *bb;
591 let a = a + da - a * da;
592
593 (r, g, b, a)
594}
595
596blend_fn3!(luminosity, luminosity_k);
597
598#[inline(always)]
599fn luminosity_k(
600 r: f32x8, g: f32x8, b: f32x8, a: f32x8,
601 dr: f32x8, dg: f32x8, db: f32x8, da: f32x8,
602) -> (f32x8, f32x8, f32x8, f32x8) {
603 let rr = &mut (dr * a);
604 let gg = &mut (dg * a);
605 let bb = &mut (db * a);
606
607 set_lum(rr, gg, bb, lum(r, g, b) * da);
608 clip_color(rr, gg, bb, a * da);
609
610 let r = r * inv(da) + dr * inv(a) + *rr;
611 let g = g * inv(da) + dg * inv(a) + *gg;
612 let b = b * inv(da) + db * inv(a) + *bb;
613 let a = a + da - a * da;
614
615 (r, g, b, a)
616}
617
618#[inline(always)]
619fn sat(r: f32x8, g: f32x8, b: f32x8) -> f32x8 {
620 r.max(g.max(b)) - r.min(g.min(b))
621}
622
623#[inline(always)]
624fn lum(r: f32x8, g: f32x8, b: f32x8) -> f32x8 {
625 r * f32x8::splat(0.30) + g * f32x8::splat(0.59) + b * f32x8::splat(0.11)
626}
627
628#[inline(always)]
629fn set_sat(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, s: f32x8) {
630 let mn = r.min(g.min(*b));
631 let mx = r.max(g.max(*b));
632 let sat = mx - mn;
633
634 let scale = |c| sat.cmp_eq(f32x8::default())
636 .blend(f32x8::default(), (c - mn) * s / sat);
637
638 *r = scale(*r);
639 *g = scale(*g);
640 *b = scale(*b);
641}
642
643#[inline(always)]
644fn set_lum(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, l: f32x8) {
645 let diff = l - lum(*r, *g, *b);
646 *r += diff;
647 *g += diff;
648 *b += diff;
649}
650
651#[inline(always)]
652fn clip_color(r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: f32x8) {
653 let mn = r.min(g.min(*b));
654 let mx = r.max(g.max(*b));
655 let l = lum(*r, *g, *b);
656
657 let clip = |mut c| {
658 c = mx.cmp_ge(f32x8::default()).blend(c, l + (c - l) * l / (l - mn));
659 c = mx.cmp_gt(a).blend(l + (c - l) * (a - l) / (mx - l), c);
660 c = c.max(f32x8::default()); c
662 };
663
664 *r = clip(*r);
665 *g = clip(*g);
666 *b = clip(*b);
667}
668
669pub fn source_over_rgba(p: &mut Pipeline) {
670 let pixels = p.pixmap_dst.slice4_at_xy(p.dx, p.dy);
671 load_8888(pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
672 p.r = mad(p.dr, inv(p.a), p.r);
673 p.g = mad(p.dg, inv(p.a), p.g);
674 p.b = mad(p.db, inv(p.a), p.b);
675 p.a = mad(p.da, inv(p.a), p.a);
676 store_8888(&p.r, &p.g, &p.b, &p.a, pixels);
677
678 p.next_stage();
679}
680
681pub fn source_over_rgba_tail(p: &mut Pipeline) {
682 let pixels = p.pixmap_dst.slice_at_xy(p.dx, p.dy);
683 load_8888_tail(p.tail, pixels, &mut p.dr, &mut p.dg, &mut p.db, &mut p.da);
684 p.r = mad(p.dr, inv(p.a), p.r);
685 p.g = mad(p.dg, inv(p.a), p.g);
686 p.b = mad(p.db, inv(p.a), p.b);
687 p.a = mad(p.da, inv(p.a), p.a);
688 store_8888_tail(&p.r, &p.g, &p.b, &p.a, p.tail, pixels);
689
690 p.next_stage();
691}
692
693fn transform(p: &mut Pipeline) {
694 let ts = &p.ctx.transform;
695
696 let tr = mad(p.r, f32x8::splat(ts.sx), mad(p.g, f32x8::splat(ts.kx), f32x8::splat(ts.tx)));
697 let tg = mad(p.r, f32x8::splat(ts.ky), mad(p.g, f32x8::splat(ts.sy), f32x8::splat(ts.ty)));
698 p.r = tr;
699 p.g = tg;
700
701 p.next_stage();
702}
703
704fn reflect(p: &mut Pipeline) {
709 let ctx = &p.ctx.limit_x;
710 p.r = exclusive_reflect(p.r, ctx.scale, ctx.inv_scale);
711
712 let ctx = &p.ctx.limit_y;
713 p.g = exclusive_reflect(p.g, ctx.scale, ctx.inv_scale);
714
715 p.next_stage();
716}
717
718#[inline(always)]
719fn exclusive_reflect(v: f32x8, limit: f32, inv_limit: f32) -> f32x8 {
720 let limit = f32x8::splat(limit);
721 let inv_limit = f32x8::splat(inv_limit);
722 ((v - limit) - (limit + limit)
723 * ((v - limit) * (inv_limit * f32x8::splat(0.5))).floor() - limit).abs()
724}
725
726fn repeat(p: &mut Pipeline) {
727 let ctx = &p.ctx.limit_x;
728 p.r = exclusive_repeat(p.r, ctx.scale, ctx.inv_scale);
729
730 let ctx = &p.ctx.limit_y;
731 p.g = exclusive_repeat(p.g, ctx.scale, ctx.inv_scale);
732
733 p.next_stage();
734}
735
736#[inline(always)]
737fn exclusive_repeat(v: f32x8, limit: f32, inv_limit: f32) -> f32x8 {
738 v - (v * f32x8::splat(inv_limit)).floor() * f32x8::splat(limit)
739}
740
741fn bilinear(p: &mut Pipeline) {
742 let x = p.r;
743 let fx = (x + f32x8::splat(0.5)).fract();
744 let y = p.g;
745 let fy = (y + f32x8::splat(0.5)).fract();
746 let one = f32x8::splat(1.0);
747 let wx = [one - fx, fx];
748 let wy = [one - fy, fy];
749
750 sampler_2x2(p.pixmap_src, &p.ctx.sampler, x, y, &wx, &wy, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
751
752 p.next_stage();
753}
754
755fn bicubic(p: &mut Pipeline) {
756 let x = p.r;
757 let fx = (x + f32x8::splat(0.5)).fract();
758 let y = p.g;
759 let fy = (y + f32x8::splat(0.5)).fract();
760 let one = f32x8::splat(1.0);
761 let wx = [bicubic_far(one - fx), bicubic_near(one - fx), bicubic_near(fx), bicubic_far(fx)];
762 let wy = [bicubic_far(one - fy), bicubic_near(one - fy), bicubic_near(fy), bicubic_far(fy)];
763
764 sampler_4x4(p.pixmap_src, &p.ctx.sampler, x, y, &wx, &wy, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
765
766 p.next_stage();
767}
768
769#[inline(always)]
775fn bicubic_near(t: f32x8) -> f32x8 {
776 mad(
778 t,
779 mad(t,
780 mad(
781 f32x8::splat(-21.0/18.0),
782 t,
783 f32x8::splat(27.0/18.0),
784 ),
785 f32x8::splat(9.0/18.0),
786 ),
787 f32x8::splat(1.0/18.0),
788 )
789}
790
791#[inline(always)]
792fn bicubic_far(t: f32x8) -> f32x8 {
793 (t * t) * mad(f32x8::splat(7.0/18.0), t, f32x8::splat(-6.0/18.0))
795}
796
797#[inline(always)]
798fn sampler_2x2(
799 pixmap: PixmapRef,
800 ctx: &super::SamplerCtx,
801 cx: f32x8, cy: f32x8,
802 wx: &[f32x8; 2], wy: &[f32x8; 2],
803 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
804) {
805 *r = f32x8::default();
806 *g = f32x8::default();
807 *b = f32x8::default();
808 *a = f32x8::default();
809
810 let one = f32x8::splat(1.0);
811 let start = -0.5;
812 let mut y = cy + f32x8::splat(start);
813 for j in 0..2 {
814 let mut x = cx + f32x8::splat(start);
815 for i in 0..2 {
816 let mut rr = f32x8::default();
817 let mut gg = f32x8::default();
818 let mut bb = f32x8::default();
819 let mut aa = f32x8::default();
820 sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa);
821
822 let w = wx[i] * wy[j];
823 *r = mad(w, rr, *r);
824 *g = mad(w, gg, *g);
825 *b = mad(w, bb, *b);
826 *a = mad(w, aa, *a);
827
828 x += one;
829 }
830
831 y += one;
832 }
833}
834
835#[inline(always)]
836fn sampler_4x4(
837 pixmap: PixmapRef,
838 ctx: &super::SamplerCtx,
839 cx: f32x8, cy: f32x8,
840 wx: &[f32x8; 4], wy: &[f32x8; 4],
841 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
842) {
843 *r = f32x8::default();
844 *g = f32x8::default();
845 *b = f32x8::default();
846 *a = f32x8::default();
847
848 let one = f32x8::splat(1.0);
849 let start = -1.5;
850 let mut y = cy + f32x8::splat(start);
851 for j in 0..4 {
852 let mut x = cx + f32x8::splat(start);
853 for i in 0..4 {
854 let mut rr = f32x8::default();
855 let mut gg = f32x8::default();
856 let mut bb = f32x8::default();
857 let mut aa = f32x8::default();
858 sample(pixmap, ctx, x,y, &mut rr, &mut gg, &mut bb, &mut aa);
859
860 let w = wx[i] * wy[j];
861 *r = mad(w, rr, *r);
862 *g = mad(w, gg, *g);
863 *b = mad(w, bb, *b);
864 *a = mad(w, aa, *a);
865
866 x += one;
867 }
868
869 y += one;
870 }
871}
872
873#[inline(always)]
874fn sample(
875 pixmap: PixmapRef, ctx: &super::SamplerCtx, mut x: f32x8, mut y: f32x8,
876 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
877) {
878 x = tile(x, ctx.spread_mode, pixmap.width() as f32, ctx.inv_width);
879 y = tile(y, ctx.spread_mode, pixmap.height() as f32, ctx.inv_height);
880
881 let ix = gather_ix(pixmap, x, y);
882 load_8888(&pixmap.gather(ix), r, g, b, a);
883}
884
885#[inline(always)]
886fn tile(v: f32x8, mode: SpreadMode, limit: f32, inv_limit: f32) -> f32x8 {
887 match mode {
888 SpreadMode::Pad => v,
889 SpreadMode::Repeat => exclusive_repeat(v, limit, inv_limit),
890 SpreadMode::Reflect => exclusive_reflect(v, limit, inv_limit),
891 }
892}
893
894fn pad_x1(p: &mut Pipeline) {
895 p.r = p.r.normalize();
896
897 p.next_stage();
898}
899
900fn reflect_x1(p: &mut Pipeline) {
901 p.r = (
902 (p.r - f32x8::splat(1.0))
903 - two(((p.r - f32x8::splat(1.0)) * f32x8::splat(0.5)).floor())
904 - f32x8::splat(1.0)
905 ).abs().normalize();
906
907 p.next_stage();
908}
909
910fn repeat_x1(p: &mut Pipeline) {
911 p.r = (p.r - p.r.floor()).normalize();
912
913 p.next_stage();
914}
915
916fn gradient(p: &mut Pipeline) {
917 let ctx = &p.ctx.gradient;
918
919 let t: [f32; 8] = p.r.into();
921 let mut idx = u32x8::default();
922 for i in 1..ctx.len {
923 let tt = ctx.t_values[i].get();
924 let n: u32x8 = bytemuck::cast([
925 (t[0] >= tt) as u32,
926 (t[1] >= tt) as u32,
927 (t[2] >= tt) as u32,
928 (t[3] >= tt) as u32,
929 (t[4] >= tt) as u32,
930 (t[5] >= tt) as u32,
931 (t[6] >= tt) as u32,
932 (t[7] >= tt) as u32,
933 ]);
934 idx = idx + n;
935 }
936 gradient_lookup(ctx, &idx, p.r, &mut p.r, &mut p.g, &mut p.b, &mut p.a);
937
938 p.next_stage();
939}
940
941fn gradient_lookup(
942 ctx: &super::GradientCtx, idx: &u32x8, t: f32x8,
943 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
944) {
945 let idx: [u32; 8] = bytemuck::cast(*idx);
946
947 macro_rules! gather {
948 ($d:expr, $c:ident) => {
949 f32x8::from([
952 $d[idx[0] as usize].$c,
953 $d[idx[1] as usize].$c,
954 $d[idx[2] as usize].$c,
955 $d[idx[3] as usize].$c,
956 $d[idx[4] as usize].$c,
957 $d[idx[5] as usize].$c,
958 $d[idx[6] as usize].$c,
959 $d[idx[7] as usize].$c,
960 ])
961 };
962 }
963
964 let fr = gather!(&ctx.factors, r);
965 let fg = gather!(&ctx.factors, g);
966 let fb = gather!(&ctx.factors, b);
967 let fa = gather!(&ctx.factors, a);
968
969 let br = gather!(&ctx.biases, r);
970 let bg = gather!(&ctx.biases, g);
971 let bb = gather!(&ctx.biases, b);
972 let ba = gather!(&ctx.biases, a);
973
974 *r = mad(t, fr, br);
975 *g = mad(t, fg, bg);
976 *b = mad(t, fb, bb);
977 *a = mad(t, fa, ba);
978}
979
980fn evenly_spaced_2_stop_gradient(p: &mut Pipeline) {
981 let ctx = &p.ctx.evenly_spaced_2_stop_gradient;
982
983 let t = p.r;
984 p.r = mad(t, f32x8::splat(ctx.factor.r), f32x8::splat(ctx.bias.r));
985 p.g = mad(t, f32x8::splat(ctx.factor.g), f32x8::splat(ctx.bias.g));
986 p.b = mad(t, f32x8::splat(ctx.factor.b), f32x8::splat(ctx.bias.b));
987 p.a = mad(t, f32x8::splat(ctx.factor.a), f32x8::splat(ctx.bias.a));
988
989 p.next_stage();
990}
991
992fn xy_to_unit_angle(p: &mut Pipeline) {
993 let x = p.r;
994 let y = p.g;
995 let x_abs = x.abs();
996 let y_abs = y.abs();
997 let slope = x_abs.min(y_abs) / x_abs.max(y_abs);
998 let s = slope * slope;
999 let phi = slope
1004 * (f32x8::splat(0.15912117063999176025390625)
1005 + s * (f32x8::splat(-5.185396969318389892578125e-2)
1006 + s * (f32x8::splat(2.476101927459239959716796875e-2)
1007 + s * (f32x8::splat(-7.0547382347285747528076171875e-3)))));
1008 let phi = x_abs.cmp_lt(y_abs).blend(f32x8::splat(0.25) - phi, phi);
1009 let phi = x
1010 .cmp_lt(f32x8::splat(0.0))
1011 .blend(f32x8::splat(0.5) - phi, phi);
1012 let phi = y
1013 .cmp_lt(f32x8::splat(0.0))
1014 .blend(f32x8::splat(1.0) - phi, phi);
1015 let phi = phi.cmp_ne(phi).blend(f32x8::splat(0.0), phi);
1016 p.r = phi;
1017 p.next_stage();
1018}
1019
1020fn xy_to_radius(p: &mut Pipeline) {
1021 let x2 = p.r * p.r;
1022 let y2 = p.g * p.g;
1023 p.r = (x2 + y2).sqrt();
1024
1025 p.next_stage();
1026}
1027
1028fn xy_to_2pt_conical_focal_on_circle(p: &mut Pipeline) {
1029 let x = p.r;
1030 let y = p.g;
1031 p.r = x + y * y / x;
1032
1033 p.next_stage();
1034}
1035
1036fn xy_to_2pt_conical_well_behaved(p: &mut Pipeline) {
1037 let ctx = &p.ctx.two_point_conical_gradient;
1038
1039 let x = p.r;
1040 let y = p.g;
1041 p.r = (x * x + y * y).sqrt() - x * f32x8::splat(ctx.p0);
1042
1043 p.next_stage();
1044}
1045
1046fn xy_to_2pt_conical_greater(p: &mut Pipeline) {
1047 let ctx = &p.ctx.two_point_conical_gradient;
1048
1049 let x = p.r;
1050 let y = p.g;
1051 p.r = (x * x - y * y).sqrt() - x * f32x8::splat(ctx.p0);
1052
1053 p.next_stage();
1054}
1055
1056fn xy_to_2pt_conical_smaller(p: &mut Pipeline) {
1057 let ctx = &p.ctx.two_point_conical_gradient;
1058
1059 let x = p.r;
1060 let y = p.g;
1061 p.r = -(x * x - y * y).sqrt() - x * f32x8::splat(ctx.p0);
1062
1063 p.next_stage();
1064}
1065
1066fn xy_to_2pt_conical_strip(p: &mut Pipeline) {
1067 let ctx = &p.ctx.two_point_conical_gradient;
1068
1069 let x = p.r;
1070 let y = p.g;
1071 p.r = x + (f32x8::splat(ctx.p0) - y * y).sqrt();
1072
1073 p.next_stage();
1074}
1075
1076fn mask_2pt_conical_nan(p: &mut Pipeline) {
1077 let ctx = &mut p.ctx.two_point_conical_gradient;
1078
1079 let t = p.r;
1080 let is_degenerate = t.cmp_ne(t);
1081 p.r = is_degenerate.blend(f32x8::default(), t);
1082 ctx.mask = cond_to_mask(!is_degenerate.to_u32x8_bitcast());
1083
1084 p.next_stage();
1085}
1086
1087fn mask_2pt_conical_degenerates(p: &mut Pipeline) {
1088 let ctx = &mut p.ctx.two_point_conical_gradient;
1089
1090 let t = p.r;
1091 let is_degenerate = t.cmp_le(f32x8::default()) | t.cmp_ne(t);
1092 p.r = is_degenerate.blend(f32x8::default(), t);
1093 ctx.mask = cond_to_mask(!is_degenerate.to_u32x8_bitcast());
1094
1095 p.next_stage();
1096}
1097
1098fn apply_vector_mask(p: &mut Pipeline) {
1099 let ctx = &p.ctx.two_point_conical_gradient;
1100
1101 p.r = (p.r.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1102 p.g = (p.g.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1103 p.b = (p.b.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1104 p.a = (p.a.to_u32x8_bitcast() & ctx.mask).to_f32x8_bitcast();
1105
1106 p.next_stage();
1107}
1108
1109fn alter_2pt_conical_compensate_focal(p: &mut Pipeline) {
1110 let ctx = &p.ctx.two_point_conical_gradient;
1111
1112 p.r = p.r + f32x8::splat(ctx.p1);
1113
1114 p.next_stage();
1115}
1116
1117fn alter_2pt_conical_unswap(p: &mut Pipeline) {
1118 p.r = f32x8::splat(1.0) - p.r;
1119
1120 p.next_stage();
1121}
1122
1123fn negate_x(p: &mut Pipeline) {
1124 p.r = -p.r;
1125
1126 p.next_stage();
1127}
1128
1129fn apply_concentric_scale_bias(p: &mut Pipeline) {
1130 let ctx = &p.ctx.two_point_conical_gradient;
1131
1132 let x = p.r;
1134 p.r = x * f32x8::splat(ctx.p0) + f32x8::splat(ctx.p1);
1135
1136 p.next_stage();
1137}
1138
1139fn gamma_expand_2(p: &mut Pipeline) {
1140 p.r = p.r * p.r;
1141 p.g = p.g * p.g;
1142 p.b = p.b * p.b;
1143
1144 p.next_stage();
1145}
1146
1147fn gamma_expand_dst_2(p: &mut Pipeline) {
1148 p.dr = p.dr * p.dr;
1149 p.dg = p.dg * p.dg;
1150 p.db = p.db * p.db;
1151
1152 p.next_stage();
1153}
1154
1155fn gamma_compress_2(p: &mut Pipeline) {
1156 p.r = p.r.sqrt();
1157 p.g = p.g.sqrt();
1158 p.b = p.b.sqrt();
1159
1160 p.next_stage();
1161}
1162
1163fn gamma_expand_22(p: &mut Pipeline) {
1164 p.r = p.r.powf(2.2);
1165 p.g = p.g.powf(2.2);
1166 p.b = p.b.powf(2.2);
1167
1168 p.next_stage();
1169}
1170
1171fn gamma_expand_dst_22(p: &mut Pipeline) {
1172 p.dr = p.dr.powf(2.2);
1173 p.dg = p.dg.powf(2.2);
1174 p.db = p.db.powf(2.2);
1175
1176 p.next_stage();
1177}
1178
1179fn gamma_compress_22(p: &mut Pipeline) {
1180 p.r = p.r.powf(0.45454545);
1181 p.g = p.g.powf(0.45454545);
1182 p.b = p.b.powf(0.45454545);
1183
1184 p.next_stage();
1185}
1186
1187fn srgb_expand(x: f32x8) -> f32x8 {
1188 let small = x.cmp_le(f32x8::splat(0.04045));
1189 let linear = x / f32x8::splat(12.92);
1190 let exp = ((x + f32x8::splat(0.055)) / f32x8::splat(1.055)).powf(2.4);
1191 small.blend(linear, exp)
1192}
1193
1194fn srgb_compress(x: f32x8) -> f32x8 {
1195 let small = x.cmp_le(f32x8::splat(0.0031308));
1196 let linear = x * f32x8::splat(12.92);
1197 let exp = x.powf(0.416666666) * f32x8::splat(1.055) - f32x8::splat(0.055);
1198 small.blend(linear, exp)
1199}
1200
1201fn gamma_expand_srgb(p: &mut Pipeline) {
1202 p.r = srgb_expand(p.r);
1203 p.g = srgb_expand(p.g);
1204 p.b = srgb_expand(p.b);
1205
1206 p.next_stage();
1207}
1208
1209fn gamma_expand_dst_srgb(p: &mut Pipeline) {
1210 p.dr = srgb_expand(p.dr);
1211 p.dg = srgb_expand(p.dg);
1212 p.db = srgb_expand(p.db);
1213
1214 p.next_stage();
1215}
1216
1217fn gamma_compress_srgb(p: &mut Pipeline) {
1218 p.r = srgb_compress(p.r);
1219 p.g = srgb_compress(p.g);
1220 p.b = srgb_compress(p.b);
1221
1222 p.next_stage();
1223}
1224
1225pub fn just_return(_: &mut Pipeline) {
1226 }
1228
1229#[inline(always)]
1230fn cond_to_mask(cond: u32x8) -> u32x8 {
1231 let cond: [u32; 8] = bytemuck::cast(cond);
1232 bytemuck::cast([
1233 if cond[0] != 0 { !0 } else { 0 },
1234 if cond[1] != 0 { !0 } else { 0 },
1235 if cond[2] != 0 { !0 } else { 0 },
1236 if cond[3] != 0 { !0 } else { 0 },
1237 if cond[4] != 0 { !0 } else { 0 },
1238 if cond[5] != 0 { !0 } else { 0 },
1239 if cond[6] != 0 { !0 } else { 0 },
1240 if cond[7] != 0 { !0 } else { 0 },
1241 ])
1242}
1243
1244#[inline(always)]
1245fn load_8888(
1246 data: &[PremultipliedColorU8; STAGE_WIDTH],
1247 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
1248) {
1249 const FACTOR: f32 = 1.0 / 255.0;
1252
1253 *r = f32x8::from([
1254 data[0].red() as f32 * FACTOR, data[1].red() as f32 * FACTOR,
1255 data[2].red() as f32 * FACTOR, data[3].red() as f32 * FACTOR,
1256 data[4].red() as f32 * FACTOR, data[5].red() as f32 * FACTOR,
1257 data[6].red() as f32 * FACTOR, data[7].red() as f32 * FACTOR,
1258 ]);
1259
1260 *g = f32x8::from([
1261 data[0].green() as f32 * FACTOR, data[1].green() as f32 * FACTOR,
1262 data[2].green() as f32 * FACTOR, data[3].green() as f32 * FACTOR,
1263 data[4].green() as f32 * FACTOR, data[5].green() as f32 * FACTOR,
1264 data[6].green() as f32 * FACTOR, data[7].green() as f32 * FACTOR,
1265 ]);
1266
1267 *b = f32x8::from([
1268 data[0].blue() as f32 * FACTOR, data[1].blue() as f32 * FACTOR,
1269 data[2].blue() as f32 * FACTOR, data[3].blue() as f32 * FACTOR,
1270 data[4].blue() as f32 * FACTOR, data[5].blue() as f32 * FACTOR,
1271 data[6].blue() as f32 * FACTOR, data[7].blue() as f32 * FACTOR,
1272 ]);
1273
1274 *a = f32x8::from([
1275 data[0].alpha() as f32 * FACTOR, data[1].alpha() as f32 * FACTOR,
1276 data[2].alpha() as f32 * FACTOR, data[3].alpha() as f32 * FACTOR,
1277 data[4].alpha() as f32 * FACTOR, data[5].alpha() as f32 * FACTOR,
1278 data[6].alpha() as f32 * FACTOR, data[7].alpha() as f32 * FACTOR,
1279 ]);
1280}
1281
1282#[inline(always)]
1283fn load_8888_tail(
1284 tail: usize, data: &[PremultipliedColorU8],
1285 r: &mut f32x8, g: &mut f32x8, b: &mut f32x8, a: &mut f32x8,
1286) {
1287 let mut tmp = [PremultipliedColorU8::TRANSPARENT; STAGE_WIDTH];
1290 tmp[0..tail].copy_from_slice(&data[0..tail]);
1291 load_8888(&tmp, r, g, b, a);
1292}
1293
1294#[inline(always)]
1295fn store_8888(
1296 r: &f32x8, g: &f32x8, b: &f32x8, a: &f32x8,
1297 data: &mut [PremultipliedColorU8; STAGE_WIDTH],
1298) {
1299 let r: [i32; 8] = unnorm(r).into();
1300 let g: [i32; 8] = unnorm(g).into();
1301 let b: [i32; 8] = unnorm(b).into();
1302 let a: [i32; 8] = unnorm(a).into();
1303
1304 let conv = |rr, gg, bb, aa|
1305 PremultipliedColorU8::from_rgba_unchecked(rr as u8, gg as u8, bb as u8, aa as u8);
1306
1307 data[0] = conv(r[0], g[0], b[0], a[0]);
1308 data[1] = conv(r[1], g[1], b[1], a[1]);
1309 data[2] = conv(r[2], g[2], b[2], a[2]);
1310 data[3] = conv(r[3], g[3], b[3], a[3]);
1311 data[4] = conv(r[4], g[4], b[4], a[4]);
1312 data[5] = conv(r[5], g[5], b[5], a[5]);
1313 data[6] = conv(r[6], g[6], b[6], a[6]);
1314 data[7] = conv(r[7], g[7], b[7], a[7]);
1315}
1316
1317#[inline(always)]
1318fn store_8888_tail(
1319 r: &f32x8, g: &f32x8, b: &f32x8, a: &f32x8,
1320 tail: usize, data: &mut [PremultipliedColorU8],
1321) {
1322 let r: [i32; 8] = unnorm(r).into();
1323 let g: [i32; 8] = unnorm(g).into();
1324 let b: [i32; 8] = unnorm(b).into();
1325 let a: [i32; 8] = unnorm(a).into();
1326
1327 for i in 0..STAGE_WIDTH {
1331 data[i] = PremultipliedColorU8::from_rgba_unchecked(
1332 r[i] as u8, g[i] as u8, b[i] as u8, a[i] as u8,
1333 );
1334
1335 if i + 1 == tail {
1336 break;
1337 }
1338 }
1339}
1340
1341#[inline(always)]
1342fn unnorm(v: &f32x8) -> i32x8 {
1343 (v.max(f32x8::default()).min(f32x8::splat(1.0)) * f32x8::splat(255.0)).round_int()
1344}
1345
1346#[inline(always)]
1347fn inv(v: f32x8) -> f32x8 {
1348 f32x8::splat(1.0) - v
1349}
1350
1351#[inline(always)]
1352fn two(v: f32x8) -> f32x8 {
1353 v + v
1354}
1355
1356#[inline(always)]
1357fn mad(f: f32x8, m: f32x8, a: f32x8) -> f32x8 {
1358 f * m + a
1359}
1360
1361#[inline(always)]
1362fn lerp(from: f32x8, to: f32x8, t: f32x8) -> f32x8 {
1363 mad(to - from, t, from)
1364}