const CHUNKS_PER_FAST_LOOP_BLOCK: usize = 4;