1use crate::flatten::TOL_2;
9#[cfg(not(feature = "std"))]
10use crate::kurbo::common::FloatFuncs as _;
11use crate::kurbo::{CubicBez, ParamCurve, PathEl, Point, QuadBez};
12use alloc::vec;
13use alloc::vec::Vec;
14use bytemuck::{Pod, Zeroable};
15use fearless_simd::*;
16
17pub(crate) trait Callback {
21 fn callback(&mut self, el: PathEl);
22}
23
24pub(crate) fn flatten<S: Simd>(
29 simd: S,
30 path: impl IntoIterator<Item = PathEl>,
31 tolerance: f64,
32 callback: &mut impl Callback,
33 flatten_ctx: &mut FlattenCtx,
34) {
35 let mut flattened_cubics = vec![];
36
37 let sqrt_tol = tolerance.sqrt();
38 let mut last_pt = None;
39
40 for el in path {
41 match el {
42 PathEl::MoveTo(p) => {
43 last_pt = Some(p);
44 callback.callback(PathEl::MoveTo(p));
45 }
46 PathEl::LineTo(p) => {
47 last_pt = Some(p);
48 callback.callback(PathEl::LineTo(p));
49 }
50 PathEl::QuadTo(p1, p2) => {
51 if let Some(p0) = last_pt {
52 if f64::max((p1 - p0).hypot2(), (p1 - p2).hypot2()) <= 4. * TOL_2 {
71 callback.callback(PathEl::LineTo(p2));
72 } else {
73 let q = QuadBez::new(p0, p1, p2);
74 let params = q.estimate_subdiv(sqrt_tol);
75 let n = ((0.5 * params.val / sqrt_tol).ceil() as usize).max(1);
76 let step = 1.0 / (n as f64);
77 for i in 1..n {
78 let u = (i as f64) * step;
79 let t = q.determine_subdiv_t(¶ms, u);
80 let p = q.eval(t);
81 callback.callback(PathEl::LineTo(p));
82 }
83 callback.callback(PathEl::LineTo(p2));
84 }
85 }
86 last_pt = Some(p2);
87 }
88 PathEl::CurveTo(p1, p2, p3) => {
89 if let Some(p0) = last_pt {
90 if f64::max((p0 - p1).hypot2(), (p3 - p2).hypot2()) <= 16. / 9. * TOL_2 {
111 callback.callback(PathEl::LineTo(p3));
112 } else {
113 let c = CubicBez::new(p0, p1, p2, p3);
114 let max = simd.vectorize(
115 #[inline(always)]
116 || {
117 flatten_cubic_simd(
118 simd,
119 c,
120 flatten_ctx,
121 tolerance as f32,
122 &mut flattened_cubics,
123 )
124 },
125 );
126
127 for p in &flattened_cubics[1..max] {
128 callback.callback(PathEl::LineTo(Point::new(p.x as f64, p.y as f64)));
129 }
130 }
131 }
132 last_pt = Some(p3);
133 }
134 PathEl::ClosePath => {
135 last_pt = None;
136 callback.callback(PathEl::ClosePath);
137 }
138 }
139 }
140}
141
142fn approx_parabola_integral(x: f64) -> f64 {
148 const D: f64 = 0.67;
149 x / (1.0 - D + (D.powi(4) + 0.25 * x * x).sqrt().sqrt())
150}
151
152fn approx_parabola_inv_integral(x: f64) -> f64 {
154 const B: f64 = 0.39;
155 x * (1.0 - B + (B * B + 0.25 * x * x).sqrt())
156}
157
158impl FlattenParamsExt for QuadBez {
159 fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams {
160 let d01 = self.p1 - self.p0;
162 let d12 = self.p2 - self.p1;
163 let dd = d01 - d12;
164 let cross = (self.p2 - self.p0).cross(dd);
165 let x0 = d01.dot(dd) * cross.recip();
166 let x2 = d12.dot(dd) * cross.recip();
167 let scale = (cross / (dd.hypot() * (x2 - x0))).abs();
168
169 let a0 = approx_parabola_integral(x0);
171 let a2 = approx_parabola_integral(x2);
172 let val = if scale.is_finite() {
173 let da = (a2 - a0).abs();
174 let sqrt_scale = scale.sqrt();
175 if x0.signum() == x2.signum() {
176 da * sqrt_scale
177 } else {
178 let xmin = sqrt_tol / sqrt_scale;
180 sqrt_tol * da / approx_parabola_integral(xmin)
181 }
182 } else {
183 0.0
184 };
185 let u0 = approx_parabola_inv_integral(a0);
186 let u2 = approx_parabola_inv_integral(a2);
187 let uscale = (u2 - u0).recip();
188 FlattenParams {
189 a0,
190 a2,
191 u0,
192 uscale,
193 val,
194 }
195 }
196
197 fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64 {
198 let a = params.a0 + (params.a2 - params.a0) * x;
199 let u = approx_parabola_inv_integral(a);
200 (u - params.u0) * params.uscale
201 }
202}
203
204trait FlattenParamsExt {
205 fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams;
206 fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64;
207}
208
209#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
216#[repr(C)]
217struct Point32 {
218 x: f32,
219 y: f32,
220}
221
222struct FlattenParams {
223 a0: f64,
224 a2: f64,
225 u0: f64,
226 uscale: f64,
227 val: f64,
229}
230
231const MAX_QUADS: usize = 16;
235
236#[derive(Default, Debug)]
238pub struct FlattenCtx {
239 even_pts: [Point32; MAX_QUADS + 4],
241 odd_pts: [Point32; MAX_QUADS],
242 a0: [f32; MAX_QUADS],
243 da: [f32; MAX_QUADS],
244 u0: [f32; MAX_QUADS],
245 uscale: [f32; MAX_QUADS],
246 val: [f32; MAX_QUADS],
247 n_quads: usize,
248}
249
250#[inline(always)]
251fn is_finite_simd<S: Simd>(x: f32x4<S>) -> mask32x4<S> {
252 let simd = x.simd;
253
254 let x_abs = x.abs();
255 let reinterpreted = u32x4::from_bytes(x_abs.to_bytes());
256 simd.simd_lt_u32x4(reinterpreted, u32x4::splat(simd, 0x7f80_0000))
257}
258
259#[inline(always)]
260fn approx_parabola_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> {
261 let simd = x.simd;
262
263 const D: f32 = 0.67;
264 const D_POWI_4: f32 = 0.201_511_2;
265
266 let temp1 = f32x8::splat(simd, 0.25).madd(x * x, f32x8::splat(simd, D_POWI_4));
267 let temp2 = temp1.sqrt();
268 let temp3 = temp2.sqrt();
269 let temp4 = f32x8::splat(simd, 1.0) - f32x8::splat(simd, D);
270 let temp5 = temp4 + temp3;
271 x / temp5
272}
273
274#[inline(always)]
275fn approx_parabola_integral_simd_x4<S: Simd>(x: f32x4<S>) -> f32x4<S> {
276 let simd = x.simd;
277
278 const D: f32 = 0.67;
279 const D_POWI_4: f32 = 0.201_511_2;
280
281 let temp1 = f32x4::splat(simd, 0.25).madd(x * x, f32x4::splat(simd, D_POWI_4));
282 let temp2 = temp1.sqrt();
283 let temp3 = temp2.sqrt();
284 let temp4 = f32x4::splat(simd, 1.0) - f32x4::splat(simd, D);
285 let temp5 = temp4 + temp3;
286 x / temp5
287}
288
289#[inline(always)]
290fn approx_parabola_inv_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> {
291 let simd = x.simd;
292
293 const B: f32 = 0.39;
294 const ONE_MINUS_B: f32 = 1.0 - B;
295
296 let temp1 = f32x8::splat(simd, B * B);
297 let temp2 = f32x8::splat(simd, 0.25).madd(x * x, temp1);
298 let temp3 = temp2.sqrt();
299 let temp4 = f32x8::splat(simd, ONE_MINUS_B) + temp3;
300
301 x * temp4
302}
303
304#[inline(always)]
305fn pt_splat_simd<S: Simd>(simd: S, pt: Point32) -> f32x8<S> {
306 let p_f64: f64 = bytemuck::cast(pt);
307 simd.reinterpret_f32_f64x4(f64x4::splat(simd, p_f64))
308}
309
310#[inline(always)]
311fn eval_simd<S: Simd>(
312 p0: f32x8<S>,
313 p1: f32x8<S>,
314 p2: f32x8<S>,
315 p3: f32x8<S>,
316 t: f32x8<S>,
317) -> f32x8<S> {
318 let mt = 1.0 - t;
319 let im0 = p0 * mt * mt * mt;
320 let im1 = p1 * mt * mt * 3.0;
321 let im2 = p2 * mt * 3.0;
322 let im3 = p3.madd(t, im2) * t;
323
324 (im1 + im3).madd(t, im0)
325}
326
327#[inline(always)]
328fn eval_cubics_simd<S: Simd>(simd: S, c: &CubicBez, n: usize, result: &mut FlattenCtx) {
329 result.n_quads = n;
330 let dt = 0.5 / n as f32;
331
332 let p0p1 = f32x4::from_slice(
334 simd,
335 &[c.p0.x as f32, c.p0.y as f32, c.p1.x as f32, c.p1.y as f32],
336 );
337 let p2p3 = f32x4::from_slice(
338 simd,
339 &[c.p2.x as f32, c.p2.y as f32, c.p3.x as f32, c.p3.y as f32],
340 );
341
342 let split_single = |input: f32x4<S>| {
343 let t1 = simd.reinterpret_f64_f32x4(input);
344 let p0 = simd.zip_low_f64x2(t1, t1);
345 let p1 = simd.zip_high_f64x2(t1, t1);
346
347 let p0 = simd.reinterpret_f32_f64x2(p0);
348 let p1 = simd.reinterpret_f32_f64x2(p1);
349
350 (f32x8::block_splat(p0), f32x8::block_splat(p1))
351 };
352
353 let (p0_128, p1_128) = split_single(p0p1);
354 let (p2_128, p3_128) = split_single(p2p3);
355
356 let iota = f32x8::from_slice(simd, &[0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0]);
357 let step = iota * dt;
358 let mut t = step;
359 let t_inc = f32x8::splat(simd, 4.0 * dt);
360
361 let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.even_pts);
362 let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.odd_pts);
363
364 for i in 0..n.div_ceil(2) {
365 let evaluated = eval_simd(p0_128, p1_128, p2_128, p3_128, t);
366 let (low, high) = simd.split_f32x8(evaluated);
367
368 even_pts[i * 4..][..4].copy_from_slice(&low.val);
369 odd_pts[i * 4..][..4].copy_from_slice(&high.val);
370
371 t += t_inc;
372 }
373
374 even_pts[n * 2..][..8].copy_from_slice(&p3_128.val);
375}
376
377#[inline(always)]
378fn estimate_subdiv_simd<S: Simd>(simd: S, sqrt_tol: f32, ctx: &mut FlattenCtx) {
379 let n = ctx.n_quads;
380
381 let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.even_pts);
382 let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.odd_pts);
383
384 for i in 0..n.div_ceil(4) {
385 let p0 = f32x8::from_slice(simd, &even_pts[i * 8..][..8]);
386 let p_onehalf = f32x8::from_slice(simd, &odd_pts[i * 8..][..8]);
387 let p2 = f32x8::from_slice(simd, &even_pts[(i * 8 + 2)..][..8]);
388 let x = p0 * -0.5;
389 let x1 = p_onehalf.madd(2.0, x);
390 let p1 = p2.madd(-0.5, x1);
391
392 odd_pts[(i * 8)..][..8].copy_from_slice(&p1.val);
393
394 let d01 = p1 - p0;
395 let d12 = p2 - p1;
396 let d01x = simd.unzip_low_f32x8(d01, d01);
397 let d01y = simd.unzip_high_f32x8(d01, d01);
398 let d12x = simd.unzip_low_f32x8(d12, d12);
399 let d12y = simd.unzip_high_f32x8(d12, d12);
400 let ddx = d01x - d12x;
401 let ddy = d01y - d12y;
402 let d02x = d01x + d12x;
403 let d02y = d01y + d12y;
404 let cross = ddx.madd(-d02y, d02x * ddy);
406
407 let x0_x2_a = {
408 let (d01x_low, _) = simd.split_f32x8(d01x);
409 let (d12x_low, _) = simd.split_f32x8(d12x);
410
411 simd.combine_f32x4(d12x_low, d01x_low) * ddx
412 };
413 let temp1 = {
414 let (d12y_low, _) = simd.split_f32x8(d12y);
415 let (d01y_low, _) = simd.split_f32x8(d01y);
416
417 simd.combine_f32x4(d12y_low, d01y_low)
418 };
419 let x0_x2_num = temp1.madd(ddy, x0_x2_a);
420 let x0_x2 = x0_x2_num / cross;
421 let (ddx_low, _) = simd.split_f32x8(ddx);
422 let (ddy_low, _) = simd.split_f32x8(ddy);
423 let dd_hypot = ddy_low.madd(ddy_low, ddx_low * ddx_low).sqrt();
424 let (x0, x2) = simd.split_f32x8(x0_x2);
425 let scale_denom = dd_hypot * (x2 - x0);
426 let (temp2, _) = simd.split_f32x8(cross);
427 let scale = (temp2 / scale_denom).abs();
428 let a0_a2 = approx_parabola_integral_simd(x0_x2);
429 let (a0, a2) = simd.split_f32x8(a0_a2);
430 let da = a2 - a0;
431 let da_abs = da.abs();
432 let sqrt_scale = scale.sqrt();
433 let temp3 = simd.or_i32x4(x0.reinterpret_i32(), x2.reinterpret_i32());
434 let mask = simd.simd_ge_i32x4(temp3, i32x4::splat(simd, 0));
435 let noncusp = da_abs * sqrt_scale;
436 let xmin = sqrt_tol / sqrt_scale;
438 let approxint = approx_parabola_integral_simd_x4(xmin);
439 let cusp = (sqrt_tol * da_abs) / approxint;
440 let val_raw = simd.select_f32x4(mask, noncusp, cusp);
441 let finite_mask = is_finite_simd(val_raw);
442 let val = simd.select_f32x4(finite_mask, val_raw, f32x4::splat(simd, 0.0));
443 let u0_u2 = approx_parabola_inv_integral_simd(a0_a2);
444 let (u0, u2) = simd.split_f32x8(u0_u2);
445 let uscale_a = u2 - u0;
446 let uscale = 1.0 / uscale_a;
447
448 ctx.a0[i * 4..][..4].copy_from_slice(&a0.val);
449 ctx.da[i * 4..][..4].copy_from_slice(&da.val);
450 ctx.u0[i * 4..][..4].copy_from_slice(&u0.val);
451 ctx.uscale[i * 4..][..4].copy_from_slice(&uscale.val);
452 ctx.val[i * 4..][..4].copy_from_slice(&val.val);
453 }
454}
455
456#[inline(always)]
457fn output_lines_simd<S: Simd>(
458 simd: S,
459 ctx: &FlattenCtx,
460 i: usize,
461 x0: f32,
462 dx: f32,
463 n: usize,
464 out: &mut [f32],
465) {
466 let p0 = pt_splat_simd(simd, ctx.even_pts[i]);
467 let p1 = pt_splat_simd(simd, ctx.odd_pts[i]);
468 let p2 = pt_splat_simd(simd, ctx.even_pts[i + 1]);
469
470 const IOTA2: [f32; 8] = [0., 0., 1., 1., 2., 2., 3., 3.];
471 let iota2 = f32x8::from_slice(simd, IOTA2.as_ref());
472 let x = iota2.madd(dx, f32x8::splat(simd, x0));
473 let da = f32x8::splat(simd, ctx.da[i]);
474 let mut a = da.madd(x, f32x8::splat(simd, ctx.a0[i]));
475 let a_inc = 4.0 * dx * da;
476 let uscale = f32x8::splat(simd, ctx.uscale[i]);
477
478 for j in 0..n.div_ceil(4) {
479 let u = approx_parabola_inv_integral_simd(a);
480 let t = u.madd(uscale, -ctx.u0[i] * uscale);
481 let mt = 1.0 - t;
482 let z = p0 * mt * mt;
483 let z1 = p1.madd(2.0 * t * mt, z);
484 let p = p2.madd(t * t, z1);
485
486 out[j * 8..][..8].copy_from_slice(&p.val);
487
488 a += a_inc;
489 }
490}
491
492#[inline(always)]
493fn flatten_cubic_simd<S: Simd>(
494 simd: S,
495 c: CubicBez,
496 ctx: &mut FlattenCtx,
497 accuracy: f32,
498 result: &mut Vec<Point32>,
499) -> usize {
500 let n_quads = estimate_num_quads(c, accuracy);
501 eval_cubics_simd(simd, &c, n_quads, ctx);
502 let tol = accuracy * (1.0 - TO_QUAD_TOL);
503 let sqrt_tol = tol.sqrt();
504 estimate_subdiv_simd(simd, sqrt_tol, ctx);
505 let sum: f32 = ctx.val[..n_quads].iter().sum();
506 let n = ((0.5 * sum / sqrt_tol).ceil() as usize).max(1);
507 result.resize(n + 4, Point32::default());
508
509 let step = sum / (n as f32);
510 let step_recip = 1.0 / step;
511 let mut val_sum = 0.0;
512 let mut last_n = 0;
513 let mut x0base = 0.0;
514
515 for i in 0..n_quads {
516 let val = ctx.val[i];
517 val_sum += val;
518 let this_n = val_sum * step_recip;
519 let this_n_next = 1.0 + this_n.floor();
520 let dn = this_n_next as usize - last_n;
521 if dn > 0 {
522 let dx = step / val;
523 let x0 = x0base * dx;
524 output_lines_simd(
525 simd,
526 ctx,
527 i,
528 x0,
529 dx,
530 dn,
531 bytemuck::cast_slice_mut(&mut result[last_n..]),
532 );
533 }
534 x0base = this_n_next - this_n;
535 last_n = this_n_next as usize;
536 }
537
538 result[n] = ctx.even_pts[n_quads];
539
540 n + 1
541}
542
543#[inline(always)]
544fn estimate_num_quads(c: CubicBez, accuracy: f32) -> usize {
545 let q_accuracy = (accuracy * TO_QUAD_TOL) as f64;
546 let max_hypot2 = 432.0 * q_accuracy * q_accuracy;
547 let p1x2 = c.p1.to_vec2() * 3.0 - c.p0.to_vec2();
548 let p2x2 = c.p2.to_vec2() * 3.0 - c.p3.to_vec2();
549 let err = (p2x2 - p1x2).hypot2();
550 let err_div = err / max_hypot2;
551
552 estimate(err_div)
553}
554
555const TO_QUAD_TOL: f32 = 0.1;
556
557#[inline(always)]
558fn estimate(err_div: f64) -> usize {
559 const LUT: [f64; MAX_QUADS] = [
569 1.0, 64.0, 729.0, 4096.0, 15625.0, 46656.0, 117649.0, 262144.0, 531441.0, 1000000.0,
570 1771561.0, 2985984.0, 4826809.0, 7529536.0, 11390625.0, 16777216.0,
571 ];
572
573 #[expect(clippy::needless_range_loop, reason = "better clarity")]
574 for i in 0..MAX_QUADS {
575 if err_div <= LUT[i] {
576 return i + 1;
577 }
578 }
579
580 MAX_QUADS
581}
582
583#[cfg(test)]
584mod tests {
585 use crate::flatten_simd::{MAX_QUADS, estimate};
586
587 fn old_estimate(err_div: f64) -> usize {
588 let n_quads = (err_div.powf(1. / 6.0).ceil() as usize).max(1);
589 n_quads.min(MAX_QUADS)
590 }
591
592 #[test]
594 #[ignore]
595 fn accuracy() {
596 for i in 0..u32::MAX {
597 let num = f32::from_bits(i);
598
599 if num.is_finite() {
600 assert_eq!(old_estimate(num as f64), estimate(num as f64), "{num}");
601 }
602 }
603 }
604}