fn CostComputation<T: SliceWrapper<Mem256i>>( depth_histo: &mut [u32; 18], nnz_data: &T, nnz: usize, _total_count: floatX, log2total: floatX, ) -> floatX