fn load_8888_tail( tail: usize, data: &[PremultipliedColorU8], r: &mut u16x16, g: &mut u16x16, b: &mut u16x16, a: &mut u16x16, )