fn round_f32_to_u16( rf: f32x16, gf: f32x16, bf: f32x16, af: f32x16, r: &mut u16x16, g: &mut u16x16, b: &mut u16x16, a: &mut u16x16, )