1use crate::flatten::TOL_2;
9#[cfg(not(feature = "std"))]
10use crate::kurbo::common::FloatFuncs as _;
11use crate::kurbo::{CubicBez, ParamCurve, PathEl, Point, QuadBez};
12use alloc::vec::Vec;
13use bytemuck::{Pod, Zeroable};
14use fearless_simd::*;
15
16pub(crate) enum LinePathEl {
28 MoveTo(Point),
29 LineTo(Point),
30}
31
32pub(crate) trait Callback {
36 fn callback(&mut self, el: LinePathEl);
37}
38
39#[inline(always)]
44pub(crate) fn flatten<S: Simd>(
45 simd: S,
46 path: impl IntoIterator<Item = PathEl>,
47 tolerance: f64,
48 callback: &mut impl Callback,
49 flatten_ctx: &mut FlattenCtx,
50) {
51 flatten_ctx.flattened_cubics.clear();
52
53 let sqrt_tol = tolerance.sqrt();
54 let mut closed = true;
55 let mut start_pt = Point::ZERO;
56 let mut last_pt = Point::ZERO;
57
58 for el in path {
59 match el {
60 PathEl::MoveTo(p) => {
61 if !closed && last_pt != start_pt {
62 callback.callback(LinePathEl::LineTo(start_pt));
63 }
64 closed = false;
65 last_pt = p;
66 start_pt = p;
67 callback.callback(LinePathEl::MoveTo(p));
68 }
69 PathEl::LineTo(p) => {
70 debug_assert!(!closed, "Expected a `MoveTo` before a `LineTo`");
71 last_pt = p;
72 callback.callback(LinePathEl::LineTo(p));
73 }
74 PathEl::QuadTo(p1, p2) => {
75 debug_assert!(!closed, "Expected a `MoveTo` before a `QuadTo`");
76 let p0 = last_pt;
77 if f64::max((p1 - p0).hypot2(), (p1 - p2).hypot2()) <= 4. * TOL_2 {
96 callback.callback(LinePathEl::LineTo(p2));
97 } else {
98 let q = QuadBez::new(p0, p1, p2);
99 let params = q.estimate_subdiv(sqrt_tol);
100 let n = ((0.5 * params.val / sqrt_tol).ceil() as usize).max(1);
101 let step = 1.0 / (n as f64);
102 for i in 1..n {
103 let u = (i as f64) * step;
104 let t = q.determine_subdiv_t(¶ms, u);
105 let p = q.eval(t);
106 callback.callback(LinePathEl::LineTo(p));
107 }
108 callback.callback(LinePathEl::LineTo(p2));
109 }
110 last_pt = p2;
111 }
112 PathEl::CurveTo(p1, p2, p3) => {
113 debug_assert!(!closed, "Expected a `MoveTo` before a `CurveTo`");
114 let p0 = last_pt;
115 if f64::max((p0 - p1).hypot2(), (p3 - p2).hypot2()) <= 16. / 9. * TOL_2 {
136 callback.callback(LinePathEl::LineTo(p3));
137 } else {
138 let c = CubicBez::new(p0, p1, p2, p3);
139 let max = flatten_cubic_simd(simd, c, flatten_ctx, tolerance as f32);
140
141 for p in &flatten_ctx.flattened_cubics[1..max] {
142 callback.callback(LinePathEl::LineTo(Point::new(p.x as f64, p.y as f64)));
143 }
144 }
145 last_pt = p3;
146 }
147 PathEl::ClosePath => {
148 closed = true;
149 if last_pt != start_pt {
150 callback.callback(LinePathEl::LineTo(start_pt));
151 }
152 }
153 }
154 }
155
156 if !closed && last_pt != start_pt {
157 callback.callback(LinePathEl::LineTo(start_pt));
158 }
159}
160
161fn approx_parabola_integral(x: f64) -> f64 {
167 const D: f64 = 0.67;
168 x / (1.0 - D + (D.powi(4) + 0.25 * x * x).sqrt().sqrt())
169}
170
171fn approx_parabola_inv_integral(x: f64) -> f64 {
173 const B: f64 = 0.39;
174 x * (1.0 - B + (B * B + 0.25 * x * x).sqrt())
175}
176
177impl FlattenParamsExt for QuadBez {
178 #[inline(always)]
179 fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams {
180 let d01 = self.p1 - self.p0;
182 let d12 = self.p2 - self.p1;
183 let dd = d01 - d12;
184 let cross = (self.p2 - self.p0).cross(dd);
185 let x0 = d01.dot(dd) * cross.recip();
186 let x2 = d12.dot(dd) * cross.recip();
187 let scale = (cross / (dd.hypot() * (x2 - x0))).abs();
188
189 let a0 = approx_parabola_integral(x0);
191 let a2 = approx_parabola_integral(x2);
192 let val = if scale.is_finite() {
193 let da = (a2 - a0).abs();
194 let sqrt_scale = scale.sqrt();
195 if x0.signum() == x2.signum() {
196 da * sqrt_scale
197 } else {
198 let xmin = sqrt_tol / sqrt_scale;
200 sqrt_tol * da / approx_parabola_integral(xmin)
201 }
202 } else {
203 0.0
204 };
205 let u0 = approx_parabola_inv_integral(a0);
206 let u2 = approx_parabola_inv_integral(a2);
207 let uscale = (u2 - u0).recip();
208 FlattenParams {
209 a0,
210 a2,
211 u0,
212 uscale,
213 val,
214 }
215 }
216
217 #[inline(always)]
218 fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64 {
219 let a = params.a0 + (params.a2 - params.a0) * x;
220 let u = approx_parabola_inv_integral(a);
221 (u - params.u0) * params.uscale
222 }
223}
224
225trait FlattenParamsExt {
226 fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams;
227 fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64;
228}
229
230#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
237#[repr(C)]
238struct Point32 {
239 x: f32,
240 y: f32,
241}
242
243struct FlattenParams {
244 a0: f64,
245 a2: f64,
246 u0: f64,
247 uscale: f64,
248 val: f64,
250}
251
252const MAX_QUADS: usize = 16;
256
257#[derive(Default, Debug)]
259pub struct FlattenCtx {
260 even_pts: [Point32; MAX_QUADS + 4],
262 odd_pts: [Point32; MAX_QUADS],
263 a0: [f32; MAX_QUADS],
264 da: [f32; MAX_QUADS],
265 u0: [f32; MAX_QUADS],
266 uscale: [f32; MAX_QUADS],
267 val: [f32; MAX_QUADS],
268 n_quads: usize,
269 flattened_cubics: Vec<Point32>,
271}
272
273#[inline(always)]
274fn is_finite_simd<S: Simd>(x: f32x4<S>) -> mask32x4<S> {
275 let simd = x.simd;
276
277 let x_abs = x.abs();
278 let reinterpreted = u32x4::from_bytes(x_abs.to_bytes());
279 simd.simd_lt_u32x4(reinterpreted, u32x4::splat(simd, 0x7f80_0000))
280}
281
282#[inline(always)]
283fn approx_parabola_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> {
284 let simd = x.simd;
285
286 const D: f32 = 0.67;
287 const D_POWI_4: f32 = 0.201_511_2;
288
289 let temp1 = f32x8::splat(simd, 0.25).madd(x * x, f32x8::splat(simd, D_POWI_4));
290 let temp2 = temp1.sqrt();
291 let temp3 = temp2.sqrt();
292 let temp4 = f32x8::splat(simd, 1.0) - f32x8::splat(simd, D);
293 let temp5 = temp4 + temp3;
294 x / temp5
295}
296
297#[inline(always)]
298fn approx_parabola_integral_simd_x4<S: Simd>(x: f32x4<S>) -> f32x4<S> {
299 let simd = x.simd;
300
301 const D: f32 = 0.67;
302 const D_POWI_4: f32 = 0.201_511_2;
303
304 let temp1 = f32x4::splat(simd, 0.25).madd(x * x, f32x4::splat(simd, D_POWI_4));
305 let temp2 = temp1.sqrt();
306 let temp3 = temp2.sqrt();
307 let temp4 = f32x4::splat(simd, 1.0) - f32x4::splat(simd, D);
308 let temp5 = temp4 + temp3;
309 x / temp5
310}
311
312#[inline(always)]
313fn approx_parabola_inv_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> {
314 let simd = x.simd;
315
316 const B: f32 = 0.39;
317 const ONE_MINUS_B: f32 = 1.0 - B;
318
319 let temp1 = f32x8::splat(simd, B * B);
320 let temp2 = f32x8::splat(simd, 0.25).madd(x * x, temp1);
321 let temp3 = temp2.sqrt();
322 let temp4 = f32x8::splat(simd, ONE_MINUS_B) + temp3;
323
324 x * temp4
325}
326
327#[inline(always)]
328fn pt_splat_simd<S: Simd>(simd: S, pt: Point32) -> f32x8<S> {
329 let p_f64: f64 = bytemuck::cast(pt);
330 simd.reinterpret_f32_f64x4(f64x4::splat(simd, p_f64))
331}
332
333#[inline(always)]
334fn eval_simd<S: Simd>(
335 p0: f32x8<S>,
336 p1: f32x8<S>,
337 p2: f32x8<S>,
338 p3: f32x8<S>,
339 t: f32x8<S>,
340) -> f32x8<S> {
341 let mt = 1.0 - t;
342 let im0 = p0 * mt * mt * mt;
343 let im1 = p1 * mt * mt * 3.0;
344 let im2 = p2 * mt * 3.0;
345 let im3 = p3.madd(t, im2) * t;
346
347 (im1 + im3).madd(t, im0)
348}
349
350#[inline(always)]
351fn eval_cubics_simd<S: Simd>(simd: S, c: &CubicBez, n: usize, result: &mut FlattenCtx) {
352 result.n_quads = n;
353 let dt = 0.5 / n as f32;
354
355 let p0p1 = f32x4::from_slice(
357 simd,
358 &[c.p0.x as f32, c.p0.y as f32, c.p1.x as f32, c.p1.y as f32],
359 );
360 let p2p3 = f32x4::from_slice(
361 simd,
362 &[c.p2.x as f32, c.p2.y as f32, c.p3.x as f32, c.p3.y as f32],
363 );
364
365 let split_single = |input: f32x4<S>| {
366 let t1 = simd.reinterpret_f64_f32x4(input);
367 let p0 = simd.zip_low_f64x2(t1, t1);
368 let p1 = simd.zip_high_f64x2(t1, t1);
369
370 let p0 = simd.reinterpret_f32_f64x2(p0);
371 let p1 = simd.reinterpret_f32_f64x2(p1);
372
373 (f32x8::block_splat(p0), f32x8::block_splat(p1))
374 };
375
376 let (p0_128, p1_128) = split_single(p0p1);
377 let (p2_128, p3_128) = split_single(p2p3);
378
379 let iota = f32x8::from_slice(simd, &[0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0]);
380 let step = iota * dt;
381 let mut t = step;
382 let t_inc = f32x8::splat(simd, 4.0 * dt);
383
384 let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.even_pts);
385 let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.odd_pts);
386
387 for i in 0..n.div_ceil(2) {
388 let evaluated = eval_simd(p0_128, p1_128, p2_128, p3_128, t);
389 let (low, high) = simd.split_f32x8(evaluated);
390
391 even_pts[i * 4..][..4].copy_from_slice(low.as_slice());
392 odd_pts[i * 4..][..4].copy_from_slice(high.as_slice());
393
394 t += t_inc;
395 }
396
397 even_pts[n * 2..][..8].copy_from_slice(p3_128.as_slice());
398}
399
400#[inline(always)]
401fn estimate_subdiv_simd<S: Simd>(simd: S, sqrt_tol: f32, ctx: &mut FlattenCtx) {
402 let n = ctx.n_quads;
403
404 let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.even_pts);
405 let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.odd_pts);
406
407 for i in 0..n.div_ceil(4) {
408 let p0 = f32x8::from_slice(simd, &even_pts[i * 8..][..8]);
409 let p_onehalf = f32x8::from_slice(simd, &odd_pts[i * 8..][..8]);
410 let p2 = f32x8::from_slice(simd, &even_pts[(i * 8 + 2)..][..8]);
411 let x = p0 * -0.5;
412 let x1 = p_onehalf.madd(2.0, x);
413 let p1 = p2.madd(-0.5, x1);
414
415 odd_pts[(i * 8)..][..8].copy_from_slice(p1.as_slice());
416
417 let d01 = p1 - p0;
418 let d12 = p2 - p1;
419 let d01x = simd.unzip_low_f32x8(d01, d01);
420 let d01y = simd.unzip_high_f32x8(d01, d01);
421 let d12x = simd.unzip_low_f32x8(d12, d12);
422 let d12y = simd.unzip_high_f32x8(d12, d12);
423 let ddx = d01x - d12x;
424 let ddy = d01y - d12y;
425 let d02x = d01x + d12x;
426 let d02y = d01y + d12y;
427 let cross = ddx.madd(-d02y, d02x * ddy);
429
430 let x0_x2_a = {
431 let (d01x_low, _) = simd.split_f32x8(d01x);
432 let (d12x_low, _) = simd.split_f32x8(d12x);
433
434 simd.combine_f32x4(d12x_low, d01x_low) * ddx
435 };
436 let temp1 = {
437 let (d12y_low, _) = simd.split_f32x8(d12y);
438 let (d01y_low, _) = simd.split_f32x8(d01y);
439
440 simd.combine_f32x4(d12y_low, d01y_low)
441 };
442 let x0_x2_num = temp1.madd(ddy, x0_x2_a);
443 let x0_x2 = x0_x2_num / cross;
444 let (ddx_low, _) = simd.split_f32x8(ddx);
445 let (ddy_low, _) = simd.split_f32x8(ddy);
446 let dd_hypot = ddy_low.madd(ddy_low, ddx_low * ddx_low).sqrt();
447 let (x0, x2) = simd.split_f32x8(x0_x2);
448 let scale_denom = dd_hypot * (x2 - x0);
449 let (temp2, _) = simd.split_f32x8(cross);
450 let scale = (temp2 / scale_denom).abs();
451 let a0_a2 = approx_parabola_integral_simd(x0_x2);
452 let (a0, a2) = simd.split_f32x8(a0_a2);
453 let da = a2 - a0;
454 let da_abs = da.abs();
455 let sqrt_scale = scale.sqrt();
456 let temp3 = simd.or_i32x4(x0.bitcast(), x2.bitcast());
457 let mask = simd.simd_ge_i32x4(temp3, i32x4::splat(simd, 0));
458 let noncusp = da_abs * sqrt_scale;
459 let xmin = sqrt_tol / sqrt_scale;
461 let approxint = approx_parabola_integral_simd_x4(xmin);
462 let cusp = (sqrt_tol * da_abs) / approxint;
463 let val_raw = simd.select_f32x4(mask, noncusp, cusp);
464 let finite_mask = is_finite_simd(val_raw);
465 let val = simd.select_f32x4(finite_mask, val_raw, f32x4::splat(simd, 0.0));
466 let u0_u2 = approx_parabola_inv_integral_simd(a0_a2);
467 let (u0, u2) = simd.split_f32x8(u0_u2);
468 let uscale_a = u2 - u0;
469 let uscale = 1.0 / uscale_a;
470
471 ctx.a0[i * 4..][..4].copy_from_slice(a0.as_slice());
472 ctx.da[i * 4..][..4].copy_from_slice(da.as_slice());
473 ctx.u0[i * 4..][..4].copy_from_slice(u0.as_slice());
474 ctx.uscale[i * 4..][..4].copy_from_slice(uscale.as_slice());
475 ctx.val[i * 4..][..4].copy_from_slice(val.as_slice());
476 }
477}
478
479#[inline(always)]
480fn output_lines_simd<S: Simd>(
481 simd: S,
482 ctx: &mut FlattenCtx,
483 i: usize,
484 x0: f32,
485 dx: f32,
486 n: usize,
487 start_idx: usize,
488) {
489 let p0 = pt_splat_simd(simd, ctx.even_pts[i]);
490 let p1 = pt_splat_simd(simd, ctx.odd_pts[i]);
491 let p2 = pt_splat_simd(simd, ctx.even_pts[i + 1]);
492
493 const IOTA2: [f32; 8] = [0., 0., 1., 1., 2., 2., 3., 3.];
494 let iota2 = f32x8::from_slice(simd, IOTA2.as_ref());
495 let x = iota2.madd(dx, f32x8::splat(simd, x0));
496 let da = f32x8::splat(simd, ctx.da[i]);
497 let mut a = da.madd(x, f32x8::splat(simd, ctx.a0[i]));
498 let a_inc = 4.0 * dx * da;
499 let uscale = f32x8::splat(simd, ctx.uscale[i]);
500
501 let out: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.flattened_cubics[start_idx..]);
502
503 for j in 0..n.div_ceil(4) {
504 let u = approx_parabola_inv_integral_simd(a);
505 let t = u.madd(uscale, -ctx.u0[i] * uscale);
506 let mt = 1.0 - t;
507 let z = p0 * mt * mt;
508 let z1 = p1.madd(2.0 * t * mt, z);
509 let p = p2.madd(t * t, z1);
510
511 out[j * 8..][..8].copy_from_slice(p.as_slice());
512
513 a += a_inc;
514 }
515}
516
517#[inline(always)]
518fn flatten_cubic_simd<S: Simd>(simd: S, c: CubicBez, ctx: &mut FlattenCtx, accuracy: f32) -> usize {
519 let n_quads = estimate_num_quads(c, accuracy);
520 eval_cubics_simd(simd, &c, n_quads, ctx);
521 let tol = accuracy * (1.0 - TO_QUAD_TOL);
522 let sqrt_tol = tol.sqrt();
523 estimate_subdiv_simd(simd, sqrt_tol, ctx);
524 let sum: f32 = ctx.val[..n_quads].iter().sum();
525 let n = ((0.5 * sum / sqrt_tol).ceil() as usize).max(1);
526 ctx.flattened_cubics.resize(n + 4, Point32::default());
527
528 let step = sum / (n as f32);
529 let step_recip = 1.0 / step;
530 let mut val_sum = 0.0;
531 let mut last_n = 0;
532 let mut x0base = 0.0;
533
534 for i in 0..n_quads {
535 let val = ctx.val[i];
536 val_sum += val;
537 let this_n = val_sum * step_recip;
538 let this_n_next = 1.0 + this_n.floor();
539 let dn = this_n_next as usize - last_n;
540 if dn > 0 {
541 let dx = step / val;
542 let x0 = x0base * dx;
543 output_lines_simd(simd, ctx, i, x0, dx, dn, last_n);
544 }
545 x0base = this_n_next - this_n;
546 last_n = this_n_next as usize;
547 }
548
549 ctx.flattened_cubics[n] = ctx.even_pts[n_quads];
550
551 n + 1
552}
553
554#[inline(always)]
555fn estimate_num_quads(c: CubicBez, accuracy: f32) -> usize {
556 let q_accuracy = (accuracy * TO_QUAD_TOL) as f64;
557 let max_hypot2 = 432.0 * q_accuracy * q_accuracy;
558 let p1x2 = c.p1.to_vec2() * 3.0 - c.p0.to_vec2();
559 let p2x2 = c.p2.to_vec2() * 3.0 - c.p3.to_vec2();
560 let err = (p2x2 - p1x2).hypot2();
561 let err_div = err / max_hypot2;
562
563 estimate(err_div)
564}
565
566const TO_QUAD_TOL: f32 = 0.1;
567
568#[inline(always)]
569fn estimate(err_div: f64) -> usize {
570 const LUT: [f64; MAX_QUADS] = [
580 1.0, 64.0, 729.0, 4096.0, 15625.0, 46656.0, 117649.0, 262144.0, 531441.0, 1000000.0,
581 1771561.0, 2985984.0, 4826809.0, 7529536.0, 11390625.0, 16777216.0,
582 ];
583
584 #[expect(clippy::needless_range_loop, reason = "better clarity")]
585 for i in 0..MAX_QUADS {
586 if err_div <= LUT[i] {
587 return i + 1;
588 }
589 }
590
591 MAX_QUADS
592}
593
594#[cfg(test)]
595mod tests {
596 use crate::flatten_simd::{MAX_QUADS, estimate};
597
598 fn old_estimate(err_div: f64) -> usize {
599 let n_quads = (err_div.powf(1. / 6.0).ceil() as usize).max(1);
600 n_quads.min(MAX_QUADS)
601 }
602
603 #[test]
605 #[ignore]
606 fn accuracy() {
607 for i in 0..u32::MAX {
608 let num = f32::from_bits(i);
609
610 if num.is_finite() {
611 assert_eq!(old_estimate(num as f64), estimate(num as f64), "{num}");
612 }
613 }
614 }
615}