Skip to main content

pxfm/
polyeval.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf};
30use crate::double_double::DoubleDouble;
31use crate::dyadic_float::DyadicFloat128;
32use std::ops::Mul;
33
34pub(crate) trait PolyevalMla {
35    fn polyeval_mla(a: Self, b: Self, c: Self) -> Self;
36}
37
38impl PolyevalMla for f64 {
39    #[inline(always)]
40    fn polyeval_mla(a: Self, b: Self, c: Self) -> Self {
41        f_fmla(a, b, c)
42    }
43}
44
45impl PolyevalMla for f32 {
46    #[inline(always)]
47    fn polyeval_mla(a: Self, b: Self, c: Self) -> Self {
48        f_fmlaf(a, b, c)
49    }
50}
51
52impl PolyevalMla for DoubleDouble {
53    #[inline(always)]
54    fn polyeval_mla(a: Self, b: Self, c: Self) -> Self {
55        DoubleDouble::mul_add(a, b, c)
56    }
57}
58
59impl PolyevalMla for DyadicFloat128 {
60    #[inline(always)]
61    fn polyeval_mla(a: Self, b: Self, c: Self) -> Self {
62        c.quick_add(&a.quick_mul(&b))
63    }
64}
65
66// impl PolyevalMla for DyadicFloat256 {
67//     #[inline(always)]
68//     fn polyeval_mla(a: Self, b: Self, c: Self) -> Self {
69//         c.quick_add(&a.quick_mul(&b))
70//     }
71// }
72
73#[inline(always)]
74#[allow(clippy::too_many_arguments)]
75pub(crate) fn f_polyeval6<T: PolyevalMla + Copy + Mul<T, Output = T>>(
76    x: T,
77    a0: T,
78    a1: T,
79    a2: T,
80    a3: T,
81    a4: T,
82    a5: T,
83) -> T {
84    let x2 = x * x;
85
86    let u0 = T::polyeval_mla(x, a5, a4);
87    let u1 = T::polyeval_mla(x, a3, a2);
88    let u2 = T::polyeval_mla(x, a1, a0);
89
90    let v0 = T::polyeval_mla(x2, u0, u1);
91
92    T::polyeval_mla(x2, v0, u2)
93}
94
95#[inline(always)]
96#[allow(clippy::too_many_arguments)]
97pub(crate) fn dd_quick_polyeval6(
98    x: DoubleDouble,
99    a0: DoubleDouble,
100    a1: DoubleDouble,
101    a2: DoubleDouble,
102    a3: DoubleDouble,
103    a4: DoubleDouble,
104    a5: DoubleDouble,
105) -> DoubleDouble {
106    let x2 = DoubleDouble::quick_mult(x, x);
107
108    let u0 = DoubleDouble::quick_mul_add(x, a5, a4);
109    let u1 = DoubleDouble::quick_mul_add(x, a3, a2);
110    let u2 = DoubleDouble::quick_mul_add(x, a1, a0);
111
112    let v0 = DoubleDouble::quick_mul_add(x2, u0, u1);
113
114    DoubleDouble::quick_mul_add(x2, v0, u2)
115}
116
117#[inline(always)]
118#[allow(unused)]
119#[allow(clippy::too_many_arguments)]
120pub(crate) fn dd_quick_polyeval6_fma(
121    x: DoubleDouble,
122    a0: DoubleDouble,
123    a1: DoubleDouble,
124    a2: DoubleDouble,
125    a3: DoubleDouble,
126    a4: DoubleDouble,
127    a5: DoubleDouble,
128) -> DoubleDouble {
129    let x2 = DoubleDouble::quick_mult_fma(x, x);
130
131    let u0 = DoubleDouble::quick_mul_add_fma(x, a5, a4);
132    let u1 = DoubleDouble::quick_mul_add_fma(x, a3, a2);
133    let u2 = DoubleDouble::quick_mul_add_fma(x, a1, a0);
134
135    let v0 = DoubleDouble::quick_mul_add_fma(x2, u0, u1);
136
137    DoubleDouble::quick_mul_add_fma(x2, v0, u2)
138}
139
140#[inline(always)]
141#[allow(unused)]
142#[allow(clippy::too_many_arguments)]
143pub(crate) fn d_polyeval6(x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 {
144    let x2 = x * x;
145
146    let u0 = f64::mul_add(x, a5, a4);
147    let u1 = f64::mul_add(x, a3, a2);
148    let u2 = f64::mul_add(x, a1, a0);
149
150    let v0 = f64::mul_add(x2, u0, u1);
151
152    f64::mul_add(x2, v0, u2)
153}
154
155#[inline(always)]
156#[allow(clippy::too_many_arguments)]
157pub(crate) fn f_polyeval9<T: PolyevalMla + Copy + Mul<T, Output = T>>(
158    x: T,
159    a0: T,
160    a1: T,
161    a2: T,
162    a3: T,
163    a4: T,
164    a5: T,
165    a6: T,
166    a7: T,
167    a8: T,
168) -> T {
169    let mut acc = a8;
170    acc = T::polyeval_mla(x, acc, a7);
171    acc = T::polyeval_mla(x, acc, a6);
172    acc = T::polyeval_mla(x, acc, a5);
173    acc = T::polyeval_mla(x, acc, a4);
174    acc = T::polyeval_mla(x, acc, a3);
175    acc = T::polyeval_mla(x, acc, a2);
176    acc = T::polyeval_mla(x, acc, a1);
177    T::polyeval_mla(x, acc, a0)
178}
179
180#[inline(always)]
181#[allow(clippy::too_many_arguments)]
182pub(crate) fn f_estrin_polyeval9<T: PolyevalMla + Copy + Mul<T, Output = T>>(
183    x: T,
184    a0: T,
185    a1: T,
186    a2: T,
187    a3: T,
188    a4: T,
189    a5: T,
190    a6: T,
191    a7: T,
192    a8: T,
193) -> T {
194    let x2 = x * x;
195    let x4 = x2 * x2;
196    let x8 = x4 * x4;
197    let p0 = T::polyeval_mla(x, a1, a0);
198    let p1 = T::polyeval_mla(x, a3, a2);
199    let p2 = T::polyeval_mla(x, a5, a4);
200    let p3 = T::polyeval_mla(x, a7, a6);
201
202    let q0 = T::polyeval_mla(x2, p1, p0);
203    let q1 = T::polyeval_mla(x2, p3, p2);
204    let r0 = T::polyeval_mla(x4, q1, q0);
205    T::polyeval_mla(x8, a8, r0)
206}
207
208#[inline(always)]
209#[allow(clippy::too_many_arguments)]
210pub(crate) fn f_polyeval10<T: PolyevalMla + Copy + Mul<T, Output = T>>(
211    x: T,
212    a0: T,
213    a1: T,
214    a2: T,
215    a3: T,
216    a4: T,
217    a5: T,
218    a6: T,
219    a7: T,
220    a8: T,
221    a9: T,
222) -> T {
223    let x2 = x * x;
224    let x4 = x2 * x2;
225    let x8 = x4 * x4;
226
227    let p0 = T::polyeval_mla(x, a1, a0);
228    let p1 = T::polyeval_mla(x, a3, a2);
229    let p2 = T::polyeval_mla(x, a5, a4);
230    let p3 = T::polyeval_mla(x, a7, a6);
231    let p4 = T::polyeval_mla(x, a9, a8);
232
233    let q0 = T::polyeval_mla(x2, p1, p0);
234    let q1 = T::polyeval_mla(x2, p3, p2);
235
236    let r0 = T::polyeval_mla(x4, q1, q0);
237    T::polyeval_mla(x8, p4, r0)
238}
239
240#[inline(always)]
241#[allow(clippy::too_many_arguments)]
242pub(crate) fn dd_quick_polyeval10(
243    x: DoubleDouble,
244    a0: DoubleDouble,
245    a1: DoubleDouble,
246    a2: DoubleDouble,
247    a3: DoubleDouble,
248    a4: DoubleDouble,
249    a5: DoubleDouble,
250    a6: DoubleDouble,
251    a7: DoubleDouble,
252    a8: DoubleDouble,
253    a9: DoubleDouble,
254) -> DoubleDouble {
255    let x2 = DoubleDouble::quick_mult(x, x);
256    let x4 = DoubleDouble::quick_mult(x2, x2);
257    let x8 = DoubleDouble::quick_mult(x4, x4);
258
259    let p0 = DoubleDouble::quick_mul_add(x, a1, a0);
260    let p1 = DoubleDouble::quick_mul_add(x, a3, a2);
261    let p2 = DoubleDouble::quick_mul_add(x, a5, a4);
262    let p3 = DoubleDouble::quick_mul_add(x, a7, a6);
263    let p4 = DoubleDouble::quick_mul_add(x, a9, a8);
264
265    let q0 = DoubleDouble::quick_mul_add(x2, p1, p0);
266    let q1 = DoubleDouble::quick_mul_add(x2, p3, p2);
267
268    let r0 = DoubleDouble::quick_mul_add(x4, q1, q0);
269    DoubleDouble::quick_mul_add(x8, p4, r0)
270}
271
272#[inline(always)]
273#[allow(unused)]
274#[allow(clippy::too_many_arguments)]
275pub(crate) fn dd_quick_polyeval10_fma(
276    x: DoubleDouble,
277    a0: DoubleDouble,
278    a1: DoubleDouble,
279    a2: DoubleDouble,
280    a3: DoubleDouble,
281    a4: DoubleDouble,
282    a5: DoubleDouble,
283    a6: DoubleDouble,
284    a7: DoubleDouble,
285    a8: DoubleDouble,
286    a9: DoubleDouble,
287) -> DoubleDouble {
288    let x2 = DoubleDouble::quick_mult_fma(x, x);
289    let x4 = DoubleDouble::quick_mult_fma(x2, x2);
290    let x8 = DoubleDouble::quick_mult_fma(x4, x4);
291
292    let p0 = DoubleDouble::quick_mul_add_fma(x, a1, a0);
293    let p1 = DoubleDouble::quick_mul_add_fma(x, a3, a2);
294    let p2 = DoubleDouble::quick_mul_add_fma(x, a5, a4);
295    let p3 = DoubleDouble::quick_mul_add_fma(x, a7, a6);
296    let p4 = DoubleDouble::quick_mul_add_fma(x, a9, a8);
297
298    let q0 = DoubleDouble::quick_mul_add_fma(x2, p1, p0);
299    let q1 = DoubleDouble::quick_mul_add_fma(x2, p3, p2);
300
301    let r0 = DoubleDouble::quick_mul_add_fma(x4, q1, q0);
302    DoubleDouble::quick_mul_add_fma(x8, p4, r0)
303}
304
305#[inline(always)]
306#[allow(clippy::too_many_arguments)]
307pub(crate) fn f_polyeval11<T: PolyevalMla + Copy + Mul<T, Output = T>>(
308    x: T,
309    a0: T,
310    a1: T,
311    a2: T,
312    a3: T,
313    a4: T,
314    a5: T,
315    a6: T,
316    a7: T,
317    a8: T,
318    a9: T,
319    a10: T,
320) -> T {
321    let x2 = x * x;
322    let x4 = x2 * x2;
323    let x8 = x4 * x4;
324
325    let q0 = T::polyeval_mla(x, a1, a0);
326    let q1 = T::polyeval_mla(x, a3, a2);
327    let q2 = T::polyeval_mla(x, a5, a4);
328    let q3 = T::polyeval_mla(x, a7, a6);
329    let q4 = T::polyeval_mla(x, a9, a8);
330
331    let r0 = T::polyeval_mla(x2, q1, q0);
332    let r1 = T::polyeval_mla(x2, q3, q2);
333
334    let s0 = T::polyeval_mla(x4, r1, r0);
335    let s1 = T::polyeval_mla(x2, a10, q4);
336    T::polyeval_mla(x8, s1, s0)
337}
338
339#[inline(always)]
340pub(crate) fn f_polyeval3<T: PolyevalMla + Copy>(x: T, a0: T, a1: T, a2: T) -> T {
341    T::polyeval_mla(x, T::polyeval_mla(x, a2, a1), a0)
342}
343
344#[inline(always)]
345#[allow(unused)]
346pub(crate) fn d_polyeval3(x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
347    f64::mul_add(x, f64::mul_add(x, a2, a1), a0)
348}
349
350#[inline(always)]
351#[allow(clippy::too_many_arguments)]
352pub(crate) fn f_polyeval4<T: PolyevalMla + Copy>(x: T, a0: T, a1: T, a2: T, a3: T) -> T {
353    let t2 = T::polyeval_mla(x, a3, a2);
354    let t5 = T::polyeval_mla(x, t2, a1);
355    T::polyeval_mla(x, t5, a0)
356}
357
358#[inline(always)]
359#[allow(unused)]
360#[allow(clippy::too_many_arguments)]
361pub(crate) fn d_polyeval4(x: f64, a0: f64, a1: f64, a2: f64, a3: f64) -> f64 {
362    let t2 = f64::mul_add(x, a3, a2);
363    let t5 = f64::mul_add(x, t2, a1);
364    f64::mul_add(x, t5, a0)
365}
366
367#[inline(always)]
368#[allow(clippy::too_many_arguments)]
369pub(crate) fn f_estrin_polyeval4<T: PolyevalMla + Copy + Mul<T, Output = T>>(
370    x: T,
371    a0: T,
372    a1: T,
373    a2: T,
374    a3: T,
375) -> T {
376    let x2 = x * x;
377
378    let p01 = T::polyeval_mla(x, a1, a0);
379    let p23 = T::polyeval_mla(x, a3, a2);
380
381    T::polyeval_mla(x2, p23, p01)
382}
383
384#[inline(always)]
385#[allow(clippy::too_many_arguments)]
386pub(crate) fn f_polyeval13<T: PolyevalMla + Copy + Mul<T, Output = T>>(
387    x: T,
388    a0: T,
389    a1: T,
390    a2: T,
391    a3: T,
392    a4: T,
393    a5: T,
394    a6: T,
395    a7: T,
396    a8: T,
397    a9: T,
398    a10: T,
399    a11: T,
400    a12: T,
401) -> T {
402    let x2 = x * x;
403    let x4 = x2 * x2;
404    let x8 = x4 * x4;
405
406    let t0 = T::polyeval_mla(x, a3, a2);
407    let t1 = T::polyeval_mla(x, a1, a0);
408    let t2 = T::polyeval_mla(x, a7, a6);
409    let t3 = T::polyeval_mla(x, a5, a4);
410    let t4 = T::polyeval_mla(x, a11, a10);
411    let t5 = T::polyeval_mla(x, a9, a8);
412
413    let q0 = T::polyeval_mla(x2, t0, t1);
414    let q1 = T::polyeval_mla(x2, t2, t3);
415
416    let q2 = T::polyeval_mla(x2, t4, t5);
417
418    let q3 = a12;
419
420    let r0 = T::polyeval_mla(x4, q1, q0);
421    let r1 = T::polyeval_mla(x4, q3, q2);
422
423    T::polyeval_mla(x8, r1, r0)
424}
425
426#[inline(always)]
427#[allow(clippy::too_many_arguments)]
428pub(crate) fn f_polyeval12<T: PolyevalMla + Copy + Mul<T, Output = T>>(
429    x: T,
430    a0: T,
431    a1: T,
432    a2: T,
433    a3: T,
434    a4: T,
435    a5: T,
436    a6: T,
437    a7: T,
438    a8: T,
439    a9: T,
440    a10: T,
441    a11: T,
442) -> T {
443    let x2 = x * x;
444    let x4 = x2 * x2;
445    let x8 = x4 * x4;
446
447    let e0 = T::polyeval_mla(x, a1, a0);
448    let e1 = T::polyeval_mla(x, a3, a2);
449    let e2 = T::polyeval_mla(x, a5, a4);
450    let e3 = T::polyeval_mla(x, a7, a6);
451    let e4 = T::polyeval_mla(x, a9, a8);
452    let e5 = T::polyeval_mla(x, a11, a10);
453
454    let f0 = T::polyeval_mla(x2, e1, e0);
455    let f1 = T::polyeval_mla(x2, e3, e2);
456    let f2 = T::polyeval_mla(x2, e5, e4);
457
458    let g0 = T::polyeval_mla(x4, f1, f0);
459
460    T::polyeval_mla(x8, f2, g0)
461}
462
463#[inline(always)]
464#[allow(clippy::too_many_arguments)]
465pub(crate) fn f_polyeval14<T: PolyevalMla + Copy + Mul<T, Output = T>>(
466    x: T,
467    a0: T,
468    a1: T,
469    a2: T,
470    a3: T,
471    a4: T,
472    a5: T,
473    a6: T,
474    a7: T,
475    a8: T,
476    a9: T,
477    a10: T,
478    a11: T,
479    a12: T,
480    a13: T,
481) -> T {
482    let x2 = x * x;
483    let x4 = x2 * x2;
484    let x8 = x4 * x4;
485
486    let g0 = T::polyeval_mla(x, a1, a0);
487    let g1 = T::polyeval_mla(x, a3, a2);
488    let g2 = T::polyeval_mla(x, a5, a4);
489    let g3 = T::polyeval_mla(x, a7, a6);
490    let g4 = T::polyeval_mla(x, a9, a8);
491    let g5 = T::polyeval_mla(x, a11, a10);
492    let g6 = T::polyeval_mla(x, a13, a12);
493
494    let h0 = T::polyeval_mla(x2, g1, g0);
495    let h1 = T::polyeval_mla(x2, g3, g2);
496    let h2 = T::polyeval_mla(x2, g5, g4);
497
498    let q0 = T::polyeval_mla(x4, h1, h0);
499    let q1 = T::polyeval_mla(x4, g6, h2);
500
501    T::polyeval_mla(x8, q1, q0)
502}
503
504#[inline(always)]
505#[allow(unused)]
506#[allow(clippy::too_many_arguments)]
507pub(crate) fn d_polyeval14(
508    x: f64,
509    a0: f64,
510    a1: f64,
511    a2: f64,
512    a3: f64,
513    a4: f64,
514    a5: f64,
515    a6: f64,
516    a7: f64,
517    a8: f64,
518    a9: f64,
519    a10: f64,
520    a11: f64,
521    a12: f64,
522    a13: f64,
523) -> f64 {
524    let x2 = x * x;
525    let x4 = x2 * x2;
526    let x8 = x4 * x4;
527
528    let g0 = f64::mul_add(x, a1, a0);
529    let g1 = f64::mul_add(x, a3, a2);
530    let g2 = f64::mul_add(x, a5, a4);
531    let g3 = f64::mul_add(x, a7, a6);
532    let g4 = f64::mul_add(x, a9, a8);
533    let g5 = f64::mul_add(x, a11, a10);
534    let g6 = f64::mul_add(x, a13, a12);
535
536    let h0 = f64::mul_add(x2, g1, g0);
537    let h1 = f64::mul_add(x2, g3, g2);
538    let h2 = f64::mul_add(x2, g5, g4);
539
540    let q0 = f64::mul_add(x4, h1, h0);
541    let q1 = f64::mul_add(x4, g6, h2);
542
543    f64::mul_add(x8, q1, q0)
544}
545
546#[inline(always)]
547#[allow(clippy::too_many_arguments)]
548pub(crate) fn f_polyeval7<T: PolyevalMla + Copy>(
549    x: T,
550    a0: T,
551    a1: T,
552    a2: T,
553    a3: T,
554    a4: T,
555    a5: T,
556    a6: T,
557) -> T {
558    let t1 = T::polyeval_mla(x, a6, a5);
559    let t2 = T::polyeval_mla(x, t1, a4);
560    let t3 = T::polyeval_mla(x, t2, a3);
561    let t4 = T::polyeval_mla(x, t3, a2);
562    let t5 = T::polyeval_mla(x, t4, a1);
563    T::polyeval_mla(x, t5, a0)
564}
565
566#[inline(always)]
567#[allow(unused)]
568#[allow(clippy::too_many_arguments)]
569pub(crate) fn d_polyeval7(
570    x: f64,
571    a0: f64,
572    a1: f64,
573    a2: f64,
574    a3: f64,
575    a4: f64,
576    a5: f64,
577    a6: f64,
578) -> f64 {
579    let t1 = f64::mul_add(x, a6, a5);
580    let t2 = f64::mul_add(x, t1, a4);
581    let t3 = f64::mul_add(x, t2, a3);
582    let t4 = f64::mul_add(x, t3, a2);
583    let t5 = f64::mul_add(x, t4, a1);
584    f64::mul_add(x, t5, a0)
585}
586
587#[inline(always)]
588#[allow(clippy::too_many_arguments)]
589pub(crate) fn f_estrin_polyeval7<T: PolyevalMla + Copy + Mul<T, Output = T>>(
590    x: T,
591    a0: T,
592    a1: T,
593    a2: T,
594    a3: T,
595    a4: T,
596    a5: T,
597    a6: T,
598) -> T {
599    let x2 = x * x;
600    let x4 = x2 * x2;
601
602    let b0 = T::polyeval_mla(x, a1, a0);
603    let b1 = T::polyeval_mla(x, a3, a2);
604    let b2 = T::polyeval_mla(x, a5, a4);
605
606    let c0 = T::polyeval_mla(x2, b1, b0);
607    let c1 = T::polyeval_mla(x2, a6, b2);
608
609    T::polyeval_mla(x4, c1, c0)
610}
611
612#[inline(always)]
613#[allow(unused)]
614#[allow(clippy::too_many_arguments)]
615pub(crate) fn d_estrin_polyeval7(
616    x: f64,
617    a0: f64,
618    a1: f64,
619    a2: f64,
620    a3: f64,
621    a4: f64,
622    a5: f64,
623    a6: f64,
624) -> f64 {
625    let x2 = x * x;
626    let x4 = x2 * x2;
627
628    let b0 = f64::mul_add(x, a1, a0);
629    let b1 = f64::mul_add(x, a3, a2);
630    let b2 = f64::mul_add(x, a5, a4);
631
632    let c0 = f64::mul_add(x2, b1, b0);
633    let c1 = f64::mul_add(x2, a6, b2);
634
635    f64::mul_add(x4, c1, c0)
636}
637
638#[allow(clippy::too_many_arguments)]
639#[inline(always)]
640pub(crate) fn f_polyeval5<T: PolyevalMla + Copy>(x: T, a0: T, a1: T, a2: T, a3: T, a4: T) -> T {
641    let mut acc = a4;
642    acc = T::polyeval_mla(x, acc, a3);
643    acc = T::polyeval_mla(x, acc, a2);
644    acc = T::polyeval_mla(x, acc, a1);
645    T::polyeval_mla(x, acc, a0)
646}
647
648#[allow(clippy::too_many_arguments)]
649#[inline(always)]
650#[allow(unused)]
651pub(crate) fn d_polyeval5(x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64) -> f64 {
652    let mut acc = a4;
653    acc = f64::mul_add(x, acc, a3);
654    acc = f64::mul_add(x, acc, a2);
655    acc = f64::mul_add(x, acc, a1);
656    f64::mul_add(x, acc, a0)
657}
658
659#[allow(clippy::too_many_arguments)]
660#[inline(always)]
661pub(crate) fn f_estrin_polyeval5<T: PolyevalMla + Copy + Mul<T, Output = T>>(
662    x: T,
663    a0: T,
664    a1: T,
665    a2: T,
666    a3: T,
667    a4: T,
668) -> T {
669    let x2 = x * x;
670    let p01 = T::polyeval_mla(x, a1, a0);
671    let p23 = T::polyeval_mla(x, a3, a2);
672    let t = T::polyeval_mla(x2, a4, p23);
673    T::polyeval_mla(x2, t, p01)
674}
675
676#[allow(clippy::too_many_arguments)]
677#[inline(always)]
678#[allow(unused)]
679pub(crate) fn d_estrin_polyeval5(x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64) -> f64 {
680    let x2 = x * x;
681    let p01 = f64::mul_add(x, a1, a0);
682    let p23 = f64::mul_add(x, a3, a2);
683    let t = f64::mul_add(x2, a4, p23);
684    f64::mul_add(x2, t, p01)
685}
686
687#[inline(always)]
688#[allow(clippy::too_many_arguments)]
689pub(crate) fn f_polyeval8<T: PolyevalMla + Copy>(
690    x: T,
691    a0: T,
692    a1: T,
693    a2: T,
694    a3: T,
695    a4: T,
696    a5: T,
697    a6: T,
698    a7: T,
699) -> T {
700    let z0 = T::polyeval_mla(x, a7, a6);
701    let t1 = T::polyeval_mla(x, z0, a5);
702    let t2 = T::polyeval_mla(x, t1, a4);
703    let t3 = T::polyeval_mla(x, t2, a3);
704    let t4 = T::polyeval_mla(x, t3, a2);
705    let t5 = T::polyeval_mla(x, t4, a1);
706    T::polyeval_mla(x, t5, a0)
707}
708
709#[inline(always)]
710#[allow(clippy::too_many_arguments)]
711pub(crate) fn f_estrin_polyeval8<T: PolyevalMla + Copy + Mul<T, Output = T>>(
712    x: T,
713    a0: T,
714    a1: T,
715    a2: T,
716    a3: T,
717    a4: T,
718    a5: T,
719    a6: T,
720    a7: T,
721) -> T {
722    let x2 = x * x;
723    let x4 = x2 * x2;
724
725    let p0 = T::polyeval_mla(x, a1, a0);
726    let p1 = T::polyeval_mla(x, a3, a2);
727    let p2 = T::polyeval_mla(x, a5, a4);
728    let p3 = T::polyeval_mla(x, a7, a6);
729
730    let q0 = T::polyeval_mla(x2, p1, p0);
731    let q1 = T::polyeval_mla(x2, p3, p2);
732
733    T::polyeval_mla(x4, q1, q0)
734}
735
736#[inline(always)]
737#[allow(clippy::too_many_arguments)]
738pub(crate) fn f_polyeval16<T: PolyevalMla + Copy + Mul<T, Output = T>>(
739    x: T,
740    a0: T,
741    a1: T,
742    a2: T,
743    a3: T,
744    a4: T,
745    a5: T,
746    a6: T,
747    a7: T,
748    a8: T,
749    a9: T,
750    a10: T,
751    a11: T,
752    a12: T,
753    a13: T,
754    a14: T,
755    a15: T,
756) -> T {
757    let x2 = x * x;
758    let x4 = x2 * x2;
759    let x8 = x4 * x4;
760
761    let q0 = T::polyeval_mla(x, a1, a0);
762    let q1 = T::polyeval_mla(x, a3, a2);
763    let q2 = T::polyeval_mla(x, a5, a4);
764    let q3 = T::polyeval_mla(x, a7, a6);
765    let q4 = T::polyeval_mla(x, a9, a8);
766    let q5 = T::polyeval_mla(x, a11, a10);
767    let q6 = T::polyeval_mla(x, a13, a12);
768    let q7 = T::polyeval_mla(x, a15, a14);
769
770    let r0 = T::polyeval_mla(x2, q1, q0);
771    let r1 = T::polyeval_mla(x2, q3, q2);
772    let r2 = T::polyeval_mla(x2, q5, q4);
773    let r3 = T::polyeval_mla(x2, q7, q6);
774
775    let s0 = T::polyeval_mla(x4, r1, r0);
776    let s1 = T::polyeval_mla(x4, r3, r2);
777
778    T::polyeval_mla(x8, s1, s0)
779}
780
781#[inline(always)]
782#[allow(clippy::too_many_arguments)]
783pub(crate) fn f_polyeval15<T: PolyevalMla + Copy + Mul<T, Output = T>>(
784    x: T,
785    a0: T,
786    a1: T,
787    a2: T,
788    a3: T,
789    a4: T,
790    a5: T,
791    a6: T,
792    a7: T,
793    a8: T,
794    a9: T,
795    a10: T,
796    a11: T,
797    a12: T,
798    a13: T,
799    a14: T,
800) -> T {
801    let x2 = x * x;
802    let x4 = x2 * x2;
803    let x8 = x4 * x4;
804
805    let e0 = T::polyeval_mla(x, a1, a0);
806    let e1 = T::polyeval_mla(x, a3, a2);
807    let e2 = T::polyeval_mla(x, a5, a4);
808    let e3 = T::polyeval_mla(x, a7, a6);
809    let e4 = T::polyeval_mla(x, a9, a8);
810    let e5 = T::polyeval_mla(x, a11, a10);
811    let e6 = T::polyeval_mla(x, a13, a12);
812
813    // Level 2
814    let f0 = T::polyeval_mla(x2, e1, e0);
815    let f1 = T::polyeval_mla(x2, e3, e2);
816    let f2 = T::polyeval_mla(x2, e5, e4);
817    let f3 = T::polyeval_mla(x2, a14, e6);
818
819    // Level 3
820    let g0 = T::polyeval_mla(x4, f1, f0);
821    let g1 = T::polyeval_mla(x4, f3, f2);
822
823    // Final
824    T::polyeval_mla(x8, g1, g0)
825}
826
827#[inline(always)]
828#[allow(clippy::too_many_arguments)]
829pub(crate) fn f_polyeval18<T: PolyevalMla + Copy + Mul<T, Output = T>>(
830    x: T,
831    a0: T,
832    a1: T,
833    a2: T,
834    a3: T,
835    a4: T,
836    a5: T,
837    a6: T,
838    a7: T,
839    a8: T,
840    a9: T,
841    a10: T,
842    a11: T,
843    a12: T,
844    a13: T,
845    a14: T,
846    a15: T,
847    a16: T,
848    a17: T,
849) -> T {
850    let x2 = x * x;
851    let x4 = x2 * x2;
852    let x8 = x4 * x4;
853    let x16 = x8 * x8;
854
855    let q0 = T::polyeval_mla(x, a1, a0);
856    let q1 = T::polyeval_mla(x, a3, a2);
857    let q2 = T::polyeval_mla(x, a5, a4);
858    let q3 = T::polyeval_mla(x, a7, a6);
859    let q4 = T::polyeval_mla(x, a9, a8);
860    let q5 = T::polyeval_mla(x, a11, a10);
861    let q6 = T::polyeval_mla(x, a13, a12);
862    let q7 = T::polyeval_mla(x, a15, a14);
863    let q8 = T::polyeval_mla(x, a17, a16);
864
865    let r0 = T::polyeval_mla(x2, q1, q0);
866    let r1 = T::polyeval_mla(x2, q3, q2);
867    let r2 = T::polyeval_mla(x2, q5, q4);
868    let r3 = T::polyeval_mla(x2, q7, q6);
869
870    let s0 = T::polyeval_mla(x4, r1, r0);
871    let s1 = T::polyeval_mla(x4, r3, r2);
872
873    let t0 = T::polyeval_mla(x8, s1, s0);
874
875    T::polyeval_mla(x16, q8, t0)
876}
877
878#[inline(always)]
879#[allow(clippy::too_many_arguments)]
880pub(crate) fn f_polyeval19<T: PolyevalMla + Copy + Mul<T, Output = T>>(
881    x: T,
882    a0: T,
883    a1: T,
884    a2: T,
885    a3: T,
886    a4: T,
887    a5: T,
888    a6: T,
889    a7: T,
890    a8: T,
891    a9: T,
892    a10: T,
893    a11: T,
894    a12: T,
895    a13: T,
896    a14: T,
897    a15: T,
898    a16: T,
899    a17: T,
900    a18: T,
901) -> T {
902    let x2 = x * x;
903    let x4 = x2 * x2;
904    let x8 = x4 * x4;
905    let x16 = x8 * x8;
906
907    // Level 0: pairs
908    let e0 = T::polyeval_mla(x, a1, a0); // a0 + a1·x
909    let e1 = T::polyeval_mla(x, a3, a2); // a2 + a3·x
910    let e2 = T::polyeval_mla(x, a5, a4);
911    let e3 = T::polyeval_mla(x, a7, a6);
912    let e4 = T::polyeval_mla(x, a9, a8);
913    let e5 = T::polyeval_mla(x, a11, a10);
914    let e6 = T::polyeval_mla(x, a13, a12);
915    let e7 = T::polyeval_mla(x, a15, a14);
916    let e8 = T::polyeval_mla(x, a17, a16);
917
918    // Level 1: combine with x²
919    let f0 = T::polyeval_mla(x2, e1, e0);
920    let f1 = T::polyeval_mla(x2, e3, e2);
921    let f2 = T::polyeval_mla(x2, e5, e4);
922    let f3 = T::polyeval_mla(x2, e7, e6);
923
924    // Level 2: combine with x⁴
925    let g0 = T::polyeval_mla(x4, f1, f0);
926    let g1 = T::polyeval_mla(x4, f3, f2);
927
928    // Level 3: combine with x⁸
929    let h0 = T::polyeval_mla(x8, g1, g0);
930
931    // Final: combine with x¹⁶
932    let final_poly = T::polyeval_mla(x16, e8, h0);
933
934    // Degree 18: Add a18·x¹⁸
935    // This assumes `x18 = x16 * x2`, since x² already computed
936    let x18 = x16 * x2;
937    T::polyeval_mla(x18, a18, final_poly)
938}
939
940#[inline(always)]
941#[allow(clippy::too_many_arguments)]
942pub(crate) fn f_polyeval22<T: PolyevalMla + Copy + Mul<T, Output = T>>(
943    x: T,
944    a0: T,
945    a1: T,
946    a2: T,
947    a3: T,
948    a4: T,
949    a5: T,
950    a6: T,
951    a7: T,
952    a8: T,
953    a9: T,
954    a10: T,
955    a11: T,
956    a12: T,
957    a13: T,
958    a14: T,
959    a15: T,
960    a16: T,
961    a17: T,
962    a18: T,
963    a19: T,
964    a20: T,
965    a21: T,
966) -> T {
967    let x2 = x * x;
968    let x4 = x2 * x2;
969    let x8 = x4 * x4;
970    let x16 = x8 * x8;
971
972    let p0 = T::polyeval_mla(x, a1, a0); // a1·x + a0
973    let p1 = T::polyeval_mla(x, a3, a2); // a3·x + a2
974    let p2 = T::polyeval_mla(x, a5, a4);
975    let p3 = T::polyeval_mla(x, a7, a6);
976    let p4 = T::polyeval_mla(x, a9, a8);
977    let p5 = T::polyeval_mla(x, a11, a10);
978    let p6 = T::polyeval_mla(x, a13, a12);
979    let p7 = T::polyeval_mla(x, a15, a14);
980    let p8 = T::polyeval_mla(x, a17, a16);
981    let p9 = T::polyeval_mla(x, a19, a18);
982    let p10 = T::polyeval_mla(x, a21, a20);
983
984    let q0 = T::polyeval_mla(x2, p1, p0); // (a3·x + a2)·x² + (a1·x + a0)
985    let q1 = T::polyeval_mla(x2, p3, p2);
986    let q2 = T::polyeval_mla(x2, p5, p4);
987    let q3 = T::polyeval_mla(x2, p7, p6);
988    let q4 = T::polyeval_mla(x2, p9, p8);
989    let r0 = T::polyeval_mla(x4, q1, q0); // q1·x⁴ + q0
990    let r1 = T::polyeval_mla(x4, q3, q2);
991    let s0 = T::polyeval_mla(x8, r1, r0); // r1·x⁸ + r0
992    let r2 = T::polyeval_mla(x4, p10, q4); // p10·x⁴ + q4
993    T::polyeval_mla(x16, r2, s0)
994}
995
996#[inline(always)]
997#[allow(clippy::too_many_arguments)]
998pub(crate) fn f_polyeval24<T: PolyevalMla + Copy + Mul<T, Output = T>>(
999    x: T,
1000    a0: T,
1001    a1: T,
1002    a2: T,
1003    a3: T,
1004    a4: T,
1005    a5: T,
1006    a6: T,
1007    a7: T,
1008    a8: T,
1009    a9: T,
1010    a10: T,
1011    a11: T,
1012    a12: T,
1013    a13: T,
1014    a14: T,
1015    a15: T,
1016    a16: T,
1017    a17: T,
1018    a18: T,
1019    a19: T,
1020    a20: T,
1021    a21: T,
1022    a22: T,
1023    a23: T,
1024) -> T {
1025    let x2 = x * x;
1026    let x4 = x2 * x2;
1027    let x8 = x4 * x4;
1028    let x16 = x8 * x8;
1029
1030    // Group degree 0–1
1031    let e0 = T::polyeval_mla(x, a1, a0);
1032    // Group degree 2–3
1033    let e1 = T::polyeval_mla(x, a3, a2);
1034    // Group degree 4–5
1035    let e2 = T::polyeval_mla(x, a5, a4);
1036    // Group degree 6–7
1037    let e3 = T::polyeval_mla(x, a7, a6);
1038    // Group degree 8–9
1039    let e4 = T::polyeval_mla(x, a9, a8);
1040    // Group degree 10–11
1041    let e5 = T::polyeval_mla(x, a11, a10);
1042    // Group degree 12–13
1043    let e6 = T::polyeval_mla(x, a13, a12);
1044    // Group degree 14–15
1045    let e7 = T::polyeval_mla(x, a15, a14);
1046    // Group degree 16–17
1047    let e8 = T::polyeval_mla(x, a17, a16);
1048    // Group degree 18–19
1049    let e9 = T::polyeval_mla(x, a19, a18);
1050    // Group degree 20–21
1051    let e10 = T::polyeval_mla(x, a21, a20);
1052    // Group degree 22–23
1053    let e11 = T::polyeval_mla(x, a23, a22);
1054
1055    // Now group into x2 terms
1056    let f0 = T::polyeval_mla(x2, e1, e0);
1057    let f1 = T::polyeval_mla(x2, e3, e2);
1058    let f2 = T::polyeval_mla(x2, e5, e4);
1059    let f3 = T::polyeval_mla(x2, e7, e6);
1060    let f4 = T::polyeval_mla(x2, e9, e8);
1061    let f5 = T::polyeval_mla(x2, e11, e10);
1062
1063    // Now group into x4 terms
1064    let g0 = T::polyeval_mla(x4, f1, f0);
1065    let g1 = T::polyeval_mla(x4, f3, f2);
1066    let g2 = T::polyeval_mla(x4, f5, f4);
1067
1068    // Now group into x8 terms
1069    let h0 = T::polyeval_mla(x8, g1, g0);
1070    let h1 = g2;
1071
1072    // Final step (x16 term)
1073    T::polyeval_mla(x16, h1, h0)
1074}