rayon/slice/mod.rs
1//! Parallel iterator types for [slices]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [slices]: std::slice
7
8mod chunk_by;
9mod chunks;
10mod rchunks;
11mod sort;
12mod windows;
13
14mod test;
15
16use self::sort::par_mergesort;
17use self::sort::par_quicksort;
18use crate::iter::plumbing::*;
19use crate::iter::*;
20use crate::split_producer::*;
21
22use std::cmp::Ordering;
23use std::fmt::{self, Debug};
24
25pub use self::chunk_by::{ChunkBy, ChunkByMut};
26pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
27pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
28pub use self::windows::{ArrayWindows, Windows};
29
30/// Parallel extensions for slices.
31pub trait ParallelSlice<T: Sync> {
32 /// Returns a plain slice, which is used to implement the rest of the
33 /// parallel methods.
34 fn as_parallel_slice(&self) -> &[T];
35
36 /// Returns a parallel iterator over subslices separated by elements that
37 /// match the separator.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// use rayon::prelude::*;
43 /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
44 /// .par_split(|i| *i == 0)
45 /// .map(|numbers| numbers.iter().product::<i32>())
46 /// .collect();
47 /// assert_eq!(products, [6, 64, 162]);
48 /// ```
49 fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
50 where
51 P: Fn(&T) -> bool + Sync + Send,
52 {
53 Split {
54 slice: self.as_parallel_slice(),
55 separator,
56 }
57 }
58
59 /// Returns a parallel iterator over subslices separated by elements that
60 /// match the separator, including the matched part as a terminator.
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// use rayon::prelude::*;
66 /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
67 /// .par_split_inclusive(|i| *i == 0)
68 /// .map(|numbers| numbers.len())
69 /// .collect();
70 /// assert_eq!(lengths, [4, 4, 3]);
71 /// ```
72 fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
73 where
74 P: Fn(&T) -> bool + Sync + Send,
75 {
76 SplitInclusive {
77 slice: self.as_parallel_slice(),
78 separator,
79 }
80 }
81
82 /// Returns a parallel iterator over all contiguous windows of length
83 /// `window_size`. The windows overlap.
84 ///
85 /// # Examples
86 ///
87 /// ```
88 /// use rayon::prelude::*;
89 /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
90 /// assert_eq!(vec![[1, 2], [2, 3]], windows);
91 /// ```
92 fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
93 Windows::new(window_size, self.as_parallel_slice())
94 }
95
96 /// Returns a parallel iterator over all contiguous array windows of
97 /// length `N`. The windows overlap.
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// use rayon::prelude::*;
103 /// let windows: Vec<_> = [1, 2, 3].par_array_windows().collect();
104 /// assert_eq!(vec![&[1, 2], &[2, 3]], windows);
105 /// ```
106 fn par_array_windows<const N: usize>(&self) -> ArrayWindows<'_, T, N> {
107 ArrayWindows::new(self.as_parallel_slice())
108 }
109
110 /// Returns a parallel iterator over at most `chunk_size` elements of
111 /// `self` at a time. The chunks do not overlap.
112 ///
113 /// If the number of elements in the iterator is not divisible by
114 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
115 /// other chunks will have that exact length.
116 ///
117 /// # Examples
118 ///
119 /// ```
120 /// use rayon::prelude::*;
121 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
122 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
123 /// ```
124 #[track_caller]
125 fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
126 assert!(chunk_size != 0, "chunk_size must not be zero");
127 Chunks::new(chunk_size, self.as_parallel_slice())
128 }
129
130 /// Returns a parallel iterator over `chunk_size` elements of
131 /// `self` at a time. The chunks do not overlap.
132 ///
133 /// If `chunk_size` does not divide the length of the slice, then the
134 /// last up to `chunk_size-1` elements will be omitted and can be
135 /// retrieved from the remainder function of the iterator.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use rayon::prelude::*;
141 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
142 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
143 /// ```
144 #[track_caller]
145 fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
146 assert!(chunk_size != 0, "chunk_size must not be zero");
147 ChunksExact::new(chunk_size, self.as_parallel_slice())
148 }
149
150 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
151 /// starting at the end. The chunks do not overlap.
152 ///
153 /// If the number of elements in the iterator is not divisible by
154 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
155 /// other chunks will have that exact length.
156 ///
157 /// # Examples
158 ///
159 /// ```
160 /// use rayon::prelude::*;
161 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
162 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
163 /// ```
164 #[track_caller]
165 fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
166 assert!(chunk_size != 0, "chunk_size must not be zero");
167 RChunks::new(chunk_size, self.as_parallel_slice())
168 }
169
170 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
171 /// starting at the end. The chunks do not overlap.
172 ///
173 /// If `chunk_size` does not divide the length of the slice, then the
174 /// last up to `chunk_size-1` elements will be omitted and can be
175 /// retrieved from the remainder function of the iterator.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use rayon::prelude::*;
181 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
182 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
183 /// ```
184 #[track_caller]
185 fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
186 assert!(chunk_size != 0, "chunk_size must not be zero");
187 RChunksExact::new(chunk_size, self.as_parallel_slice())
188 }
189
190 /// Returns a parallel iterator over the slice producing non-overlapping runs
191 /// of elements using the predicate to separate them.
192 ///
193 /// The predicate is called on two elements following themselves,
194 /// it means the predicate is called on `slice[0]` and `slice[1]`
195 /// then on `slice[1]` and `slice[2]` and so on.
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// use rayon::prelude::*;
201 /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
202 /// assert_eq!(chunks[0], &[1]);
203 /// assert_eq!(chunks[1], &[2, 2]);
204 /// assert_eq!(chunks[2], &[3, 3, 3]);
205 /// ```
206 fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
207 where
208 F: Fn(&T, &T) -> bool + Send + Sync,
209 {
210 ChunkBy::new(self.as_parallel_slice(), pred)
211 }
212}
213
214impl<T: Sync> ParallelSlice<T> for [T] {
215 #[inline]
216 fn as_parallel_slice(&self) -> &[T] {
217 self
218 }
219}
220
221/// Parallel extensions for mutable slices.
222pub trait ParallelSliceMut<T: Send> {
223 /// Returns a plain mutable slice, which is used to implement the rest of
224 /// the parallel methods.
225 fn as_parallel_slice_mut(&mut self) -> &mut [T];
226
227 /// Returns a parallel iterator over mutable subslices separated by
228 /// elements that match the separator.
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// use rayon::prelude::*;
234 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
235 /// array.par_split_mut(|i| *i == 0)
236 /// .for_each(|slice| slice.reverse());
237 /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
238 /// ```
239 fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
240 where
241 P: Fn(&T) -> bool + Sync + Send,
242 {
243 SplitMut {
244 slice: self.as_parallel_slice_mut(),
245 separator,
246 }
247 }
248
249 /// Returns a parallel iterator over mutable subslices separated by elements
250 /// that match the separator, including the matched part as a terminator.
251 ///
252 /// # Examples
253 ///
254 /// ```
255 /// use rayon::prelude::*;
256 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
257 /// array.par_split_inclusive_mut(|i| *i == 0)
258 /// .for_each(|slice| slice.reverse());
259 /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
260 /// ```
261 fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
262 where
263 P: Fn(&T) -> bool + Sync + Send,
264 {
265 SplitInclusiveMut {
266 slice: self.as_parallel_slice_mut(),
267 separator,
268 }
269 }
270
271 /// Returns a parallel iterator over at most `chunk_size` elements of
272 /// `self` at a time. The chunks are mutable and do not overlap.
273 ///
274 /// If the number of elements in the iterator is not divisible by
275 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
276 /// other chunks will have that exact length.
277 ///
278 /// # Examples
279 ///
280 /// ```
281 /// use rayon::prelude::*;
282 /// let mut array = [1, 2, 3, 4, 5];
283 /// array.par_chunks_mut(2)
284 /// .for_each(|slice| slice.reverse());
285 /// assert_eq!(array, [2, 1, 4, 3, 5]);
286 /// ```
287 #[track_caller]
288 fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
289 assert!(chunk_size != 0, "chunk_size must not be zero");
290 ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
291 }
292
293 /// Returns a parallel iterator over `chunk_size` elements of
294 /// `self` at a time. The chunks are mutable and do not overlap.
295 ///
296 /// If `chunk_size` does not divide the length of the slice, then the
297 /// last up to `chunk_size-1` elements will be omitted and can be
298 /// retrieved from the remainder function of the iterator.
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use rayon::prelude::*;
304 /// let mut array = [1, 2, 3, 4, 5];
305 /// array.par_chunks_exact_mut(3)
306 /// .for_each(|slice| slice.reverse());
307 /// assert_eq!(array, [3, 2, 1, 4, 5]);
308 /// ```
309 #[track_caller]
310 fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
311 assert!(chunk_size != 0, "chunk_size must not be zero");
312 ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
313 }
314
315 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
316 /// starting at the end. The chunks are mutable and do not overlap.
317 ///
318 /// If the number of elements in the iterator is not divisible by
319 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
320 /// other chunks will have that exact length.
321 ///
322 /// # Examples
323 ///
324 /// ```
325 /// use rayon::prelude::*;
326 /// let mut array = [1, 2, 3, 4, 5];
327 /// array.par_rchunks_mut(2)
328 /// .for_each(|slice| slice.reverse());
329 /// assert_eq!(array, [1, 3, 2, 5, 4]);
330 /// ```
331 #[track_caller]
332 fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
333 assert!(chunk_size != 0, "chunk_size must not be zero");
334 RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
335 }
336
337 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
338 /// starting at the end. The chunks are mutable and do not overlap.
339 ///
340 /// If `chunk_size` does not divide the length of the slice, then the
341 /// last up to `chunk_size-1` elements will be omitted and can be
342 /// retrieved from the remainder function of the iterator.
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use rayon::prelude::*;
348 /// let mut array = [1, 2, 3, 4, 5];
349 /// array.par_rchunks_exact_mut(3)
350 /// .for_each(|slice| slice.reverse());
351 /// assert_eq!(array, [1, 2, 5, 4, 3]);
352 /// ```
353 #[track_caller]
354 fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
355 assert!(chunk_size != 0, "chunk_size must not be zero");
356 RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
357 }
358
359 /// Sorts the slice in parallel.
360 ///
361 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
362 ///
363 /// When applicable, unstable sorting is preferred because it is generally faster than stable
364 /// sorting and it doesn't allocate auxiliary memory.
365 /// See [`par_sort_unstable`](#method.par_sort_unstable).
366 ///
367 /// # Current implementation
368 ///
369 /// The current algorithm is an adaptive merge sort inspired by
370 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
371 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
372 /// two or more sorted sequences concatenated one after another.
373 ///
374 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
375 /// non-allocating insertion sort is used instead.
376 ///
377 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
378 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
379 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
380 /// parallel subdivision of chunks and parallel merge operation.
381 ///
382 /// # Examples
383 ///
384 /// ```
385 /// use rayon::prelude::*;
386 ///
387 /// let mut v = [-5, 4, 1, -3, 2];
388 ///
389 /// v.par_sort();
390 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
391 /// ```
392 fn par_sort(&mut self)
393 where
394 T: Ord,
395 {
396 par_mergesort(self.as_parallel_slice_mut(), T::lt);
397 }
398
399 /// Sorts the slice in parallel with a comparator function.
400 ///
401 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
402 ///
403 /// The comparator function must define a total ordering for the elements in the slice. If
404 /// the ordering is not total, the order of the elements is unspecified. An order is a
405 /// total order if it is (for all `a`, `b` and `c`):
406 ///
407 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
408 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
409 ///
410 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
411 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
412 ///
413 /// ```
414 /// use rayon::prelude::*;
415 ///
416 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
417 /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
418 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
419 /// ```
420 ///
421 /// When applicable, unstable sorting is preferred because it is generally faster than stable
422 /// sorting and it doesn't allocate auxiliary memory.
423 /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
424 ///
425 /// # Current implementation
426 ///
427 /// The current algorithm is an adaptive merge sort inspired by
428 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
429 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
430 /// two or more sorted sequences concatenated one after another.
431 ///
432 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
433 /// non-allocating insertion sort is used instead.
434 ///
435 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
436 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
437 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
438 /// parallel subdivision of chunks and parallel merge operation.
439 ///
440 /// # Examples
441 ///
442 /// ```
443 /// use rayon::prelude::*;
444 ///
445 /// let mut v = [5, 4, 1, 3, 2];
446 /// v.par_sort_by(|a, b| a.cmp(b));
447 /// assert_eq!(v, [1, 2, 3, 4, 5]);
448 ///
449 /// // reverse sorting
450 /// v.par_sort_by(|a, b| b.cmp(a));
451 /// assert_eq!(v, [5, 4, 3, 2, 1]);
452 /// ```
453 fn par_sort_by<F>(&mut self, compare: F)
454 where
455 F: Fn(&T, &T) -> Ordering + Sync,
456 {
457 par_mergesort(self.as_parallel_slice_mut(), |a, b| {
458 compare(a, b) == Ordering::Less
459 });
460 }
461
462 /// Sorts the slice in parallel with a key extraction function.
463 ///
464 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
465 /// worst-case, where the key function is *O*(*m*).
466 ///
467 /// For expensive key functions (e.g. functions that are not simple property accesses or
468 /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
469 /// be significantly faster, as it does not recompute element keys.
470 ///
471 /// When applicable, unstable sorting is preferred because it is generally faster than stable
472 /// sorting and it doesn't allocate auxiliary memory.
473 /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
474 ///
475 /// # Current implementation
476 ///
477 /// The current algorithm is an adaptive merge sort inspired by
478 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
479 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
480 /// two or more sorted sequences concatenated one after another.
481 ///
482 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
483 /// non-allocating insertion sort is used instead.
484 ///
485 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
486 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
487 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
488 /// parallel subdivision of chunks and parallel merge operation.
489 ///
490 /// # Examples
491 ///
492 /// ```
493 /// use rayon::prelude::*;
494 ///
495 /// let mut v = [-5i32, 4, 1, -3, 2];
496 ///
497 /// v.par_sort_by_key(|k| k.abs());
498 /// assert_eq!(v, [1, 2, -3, 4, -5]);
499 /// ```
500 fn par_sort_by_key<K, F>(&mut self, f: F)
501 where
502 K: Ord,
503 F: Fn(&T) -> K + Sync,
504 {
505 par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
506 }
507
508 /// Sorts the slice in parallel with a key extraction function.
509 ///
510 /// During sorting, the key function is called at most once per element, by using
511 /// temporary storage to remember the results of key evaluation.
512 /// The key function is called in parallel, so the order of calls is completely unspecified.
513 ///
514 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
515 /// worst-case, where the key function is *O*(*m*).
516 ///
517 /// For simple key functions (e.g., functions that are property accesses or
518 /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
519 /// faster.
520 ///
521 /// # Current implementation
522 ///
523 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
524 /// which combines the fast average case of randomized quicksort with the fast worst case of
525 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
526 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
527 /// deterministic behavior.
528 ///
529 /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
530 /// length of the slice.
531 ///
532 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
533 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
534 /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
535 ///
536 /// [pdqsort]: https://github.com/orlp/pdqsort
537 ///
538 /// # Examples
539 ///
540 /// ```
541 /// use rayon::prelude::*;
542 ///
543 /// let mut v = [-5i32, 4, 32, -3, 2];
544 ///
545 /// v.par_sort_by_cached_key(|k| k.to_string());
546 /// assert!(v == [-3, -5, 2, 32, 4]);
547 /// ```
548 fn par_sort_by_cached_key<K, F>(&mut self, f: F)
549 where
550 F: Fn(&T) -> K + Sync,
551 K: Ord + Send,
552 {
553 let slice = self.as_parallel_slice_mut();
554 let len = slice.len();
555 if len < 2 {
556 return;
557 }
558
559 // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
560 macro_rules! sort_by_key {
561 ($t:ty) => {{
562 let mut indices: Vec<_> = slice
563 .par_iter_mut()
564 .enumerate()
565 .map(|(i, x)| (f(&*x), i as $t))
566 .collect();
567 // The elements of `indices` are unique, as they are indexed, so any sort will be
568 // stable with respect to the original slice. We use `sort_unstable` here because
569 // it requires less memory allocation.
570 indices.par_sort_unstable();
571 for i in 0..len {
572 let mut index = indices[i].1;
573 while (index as usize) < i {
574 index = indices[index as usize].1;
575 }
576 indices[i].1 = index;
577 slice.swap(i, index as usize);
578 }
579 }};
580 }
581
582 let sz_u8 = size_of::<(K, u8)>();
583 let sz_u16 = size_of::<(K, u16)>();
584 let sz_u32 = size_of::<(K, u32)>();
585 let sz_usize = size_of::<(K, usize)>();
586
587 if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
588 return sort_by_key!(u8);
589 }
590 if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
591 return sort_by_key!(u16);
592 }
593 if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
594 return sort_by_key!(u32);
595 }
596 sort_by_key!(usize)
597 }
598
599 /// Sorts the slice in parallel, but might not preserve the order of equal elements.
600 ///
601 /// This sort is unstable (i.e., may reorder equal elements), in-place
602 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
603 ///
604 /// # Current implementation
605 ///
606 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
607 /// which combines the fast average case of randomized quicksort with the fast worst case of
608 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
609 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
610 /// deterministic behavior.
611 ///
612 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
613 /// slice consists of several concatenated sorted sequences.
614 ///
615 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
616 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
617 /// parallel.
618 ///
619 /// [pdqsort]: https://github.com/orlp/pdqsort
620 ///
621 /// # Examples
622 ///
623 /// ```
624 /// use rayon::prelude::*;
625 ///
626 /// let mut v = [-5, 4, 1, -3, 2];
627 ///
628 /// v.par_sort_unstable();
629 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
630 /// ```
631 fn par_sort_unstable(&mut self)
632 where
633 T: Ord,
634 {
635 par_quicksort(self.as_parallel_slice_mut(), T::lt);
636 }
637
638 /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
639 /// equal elements.
640 ///
641 /// This sort is unstable (i.e., may reorder equal elements), in-place
642 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
643 ///
644 /// The comparator function must define a total ordering for the elements in the slice. If
645 /// the ordering is not total, the order of the elements is unspecified. An order is a
646 /// total order if it is (for all `a`, `b` and `c`):
647 ///
648 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
649 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
650 ///
651 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
652 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
653 ///
654 /// ```
655 /// use rayon::prelude::*;
656 ///
657 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
658 /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
659 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
660 /// ```
661 ///
662 /// # Current implementation
663 ///
664 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
665 /// which combines the fast average case of randomized quicksort with the fast worst case of
666 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
667 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
668 /// deterministic behavior.
669 ///
670 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
671 /// slice consists of several concatenated sorted sequences.
672 ///
673 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
674 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
675 /// parallel.
676 ///
677 /// [pdqsort]: https://github.com/orlp/pdqsort
678 ///
679 /// # Examples
680 ///
681 /// ```
682 /// use rayon::prelude::*;
683 ///
684 /// let mut v = [5, 4, 1, 3, 2];
685 /// v.par_sort_unstable_by(|a, b| a.cmp(b));
686 /// assert_eq!(v, [1, 2, 3, 4, 5]);
687 ///
688 /// // reverse sorting
689 /// v.par_sort_unstable_by(|a, b| b.cmp(a));
690 /// assert_eq!(v, [5, 4, 3, 2, 1]);
691 /// ```
692 fn par_sort_unstable_by<F>(&mut self, compare: F)
693 where
694 F: Fn(&T, &T) -> Ordering + Sync,
695 {
696 par_quicksort(self.as_parallel_slice_mut(), |a, b| {
697 compare(a, b) == Ordering::Less
698 });
699 }
700
701 /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
702 /// of equal elements.
703 ///
704 /// This sort is unstable (i.e., may reorder equal elements), in-place
705 /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
706 /// where the key function is *O*(*m*).
707 ///
708 /// # Current implementation
709 ///
710 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
711 /// which combines the fast average case of randomized quicksort with the fast worst case of
712 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
713 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
714 /// deterministic behavior.
715 ///
716 /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
717 /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
718 /// is expensive.
719 ///
720 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
721 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
722 /// parallel.
723 ///
724 /// [pdqsort]: https://github.com/orlp/pdqsort
725 ///
726 /// # Examples
727 ///
728 /// ```
729 /// use rayon::prelude::*;
730 ///
731 /// let mut v = [-5i32, 4, 1, -3, 2];
732 ///
733 /// v.par_sort_unstable_by_key(|k| k.abs());
734 /// assert_eq!(v, [1, 2, -3, 4, -5]);
735 /// ```
736 fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
737 where
738 K: Ord,
739 F: Fn(&T) -> K + Sync,
740 {
741 par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
742 }
743
744 /// Returns a parallel iterator over the slice producing non-overlapping mutable
745 /// runs of elements using the predicate to separate them.
746 ///
747 /// The predicate is called on two elements following themselves,
748 /// it means the predicate is called on `slice[0]` and `slice[1]`
749 /// then on `slice[1]` and `slice[2]` and so on.
750 ///
751 /// # Examples
752 ///
753 /// ```
754 /// use rayon::prelude::*;
755 /// let mut xs = [1, 2, 2, 3, 3, 3];
756 /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
757 /// assert_eq!(chunks[0], &mut [1]);
758 /// assert_eq!(chunks[1], &mut [2, 2]);
759 /// assert_eq!(chunks[2], &mut [3, 3, 3]);
760 /// ```
761 fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
762 where
763 F: Fn(&T, &T) -> bool + Send + Sync,
764 {
765 ChunkByMut::new(self.as_parallel_slice_mut(), pred)
766 }
767}
768
769impl<T: Send> ParallelSliceMut<T> for [T] {
770 #[inline]
771 fn as_parallel_slice_mut(&mut self) -> &mut [T] {
772 self
773 }
774}
775
776impl<'data, T: Sync> IntoParallelIterator for &'data [T] {
777 type Item = &'data T;
778 type Iter = Iter<'data, T>;
779
780 fn into_par_iter(self) -> Self::Iter {
781 Iter { slice: self }
782 }
783}
784
785impl<'data, T: Sync> IntoParallelIterator for &'data Box<[T]> {
786 type Item = &'data T;
787 type Iter = Iter<'data, T>;
788
789 fn into_par_iter(self) -> Self::Iter {
790 Iter { slice: self }
791 }
792}
793
794impl<'data, T: Send> IntoParallelIterator for &'data mut [T] {
795 type Item = &'data mut T;
796 type Iter = IterMut<'data, T>;
797
798 fn into_par_iter(self) -> Self::Iter {
799 IterMut { slice: self }
800 }
801}
802
803impl<'data, T: Send> IntoParallelIterator for &'data mut Box<[T]> {
804 type Item = &'data mut T;
805 type Iter = IterMut<'data, T>;
806
807 fn into_par_iter(self) -> Self::Iter {
808 IterMut { slice: self }
809 }
810}
811
812/// Parallel iterator over immutable items in a slice
813#[derive(Debug)]
814pub struct Iter<'data, T> {
815 slice: &'data [T],
816}
817
818impl<T> Clone for Iter<'_, T> {
819 fn clone(&self) -> Self {
820 Iter { ..*self }
821 }
822}
823
824impl<'data, T: Sync> ParallelIterator for Iter<'data, T> {
825 type Item = &'data T;
826
827 fn drive_unindexed<C>(self, consumer: C) -> C::Result
828 where
829 C: UnindexedConsumer<Self::Item>,
830 {
831 bridge(self, consumer)
832 }
833
834 fn opt_len(&self) -> Option<usize> {
835 Some(self.len())
836 }
837}
838
839impl<T: Sync> IndexedParallelIterator for Iter<'_, T> {
840 fn drive<C>(self, consumer: C) -> C::Result
841 where
842 C: Consumer<Self::Item>,
843 {
844 bridge(self, consumer)
845 }
846
847 fn len(&self) -> usize {
848 self.slice.len()
849 }
850
851 fn with_producer<CB>(self, callback: CB) -> CB::Output
852 where
853 CB: ProducerCallback<Self::Item>,
854 {
855 callback.callback(IterProducer { slice: self.slice })
856 }
857}
858
859struct IterProducer<'data, T: Sync> {
860 slice: &'data [T],
861}
862
863impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
864 type Item = &'data T;
865 type IntoIter = ::std::slice::Iter<'data, T>;
866
867 fn into_iter(self) -> Self::IntoIter {
868 self.slice.iter()
869 }
870
871 fn split_at(self, index: usize) -> (Self, Self) {
872 let (left, right) = self.slice.split_at(index);
873 (IterProducer { slice: left }, IterProducer { slice: right })
874 }
875}
876
877/// Parallel iterator over mutable items in a slice
878#[derive(Debug)]
879pub struct IterMut<'data, T> {
880 slice: &'data mut [T],
881}
882
883impl<'data, T: Send> ParallelIterator for IterMut<'data, T> {
884 type Item = &'data mut T;
885
886 fn drive_unindexed<C>(self, consumer: C) -> C::Result
887 where
888 C: UnindexedConsumer<Self::Item>,
889 {
890 bridge(self, consumer)
891 }
892
893 fn opt_len(&self) -> Option<usize> {
894 Some(self.len())
895 }
896}
897
898impl<T: Send> IndexedParallelIterator for IterMut<'_, T> {
899 fn drive<C>(self, consumer: C) -> C::Result
900 where
901 C: Consumer<Self::Item>,
902 {
903 bridge(self, consumer)
904 }
905
906 fn len(&self) -> usize {
907 self.slice.len()
908 }
909
910 fn with_producer<CB>(self, callback: CB) -> CB::Output
911 where
912 CB: ProducerCallback<Self::Item>,
913 {
914 callback.callback(IterMutProducer { slice: self.slice })
915 }
916}
917
918struct IterMutProducer<'data, T: Send> {
919 slice: &'data mut [T],
920}
921
922impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
923 type Item = &'data mut T;
924 type IntoIter = ::std::slice::IterMut<'data, T>;
925
926 fn into_iter(self) -> Self::IntoIter {
927 self.slice.iter_mut()
928 }
929
930 fn split_at(self, index: usize) -> (Self, Self) {
931 let (left, right) = self.slice.split_at_mut(index);
932 (
933 IterMutProducer { slice: left },
934 IterMutProducer { slice: right },
935 )
936 }
937}
938
939/// Parallel iterator over slices separated by a predicate
940pub struct Split<'data, T, P> {
941 slice: &'data [T],
942 separator: P,
943}
944
945impl<T, P: Clone> Clone for Split<'_, T, P> {
946 fn clone(&self) -> Self {
947 Split {
948 separator: self.separator.clone(),
949 ..*self
950 }
951 }
952}
953
954impl<T: Debug, P> Debug for Split<'_, T, P> {
955 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
956 f.debug_struct("Split").field("slice", &self.slice).finish()
957 }
958}
959
960impl<'data, T, P> ParallelIterator for Split<'data, T, P>
961where
962 P: Fn(&T) -> bool + Sync + Send,
963 T: Sync,
964{
965 type Item = &'data [T];
966
967 fn drive_unindexed<C>(self, consumer: C) -> C::Result
968 where
969 C: UnindexedConsumer<Self::Item>,
970 {
971 let producer = SplitProducer::new(self.slice, &self.separator);
972 bridge_unindexed(producer, consumer)
973 }
974}
975
976/// Parallel iterator over slices separated by a predicate,
977/// including the matched part as a terminator.
978pub struct SplitInclusive<'data, T, P> {
979 slice: &'data [T],
980 separator: P,
981}
982
983impl<T, P: Clone> Clone for SplitInclusive<'_, T, P> {
984 fn clone(&self) -> Self {
985 SplitInclusive {
986 separator: self.separator.clone(),
987 ..*self
988 }
989 }
990}
991
992impl<T: Debug, P> Debug for SplitInclusive<'_, T, P> {
993 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
994 f.debug_struct("SplitInclusive")
995 .field("slice", &self.slice)
996 .finish()
997 }
998}
999
1000impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1001where
1002 P: Fn(&T) -> bool + Sync + Send,
1003 T: Sync,
1004{
1005 type Item = &'data [T];
1006
1007 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1008 where
1009 C: UnindexedConsumer<Self::Item>,
1010 {
1011 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1012 bridge_unindexed(producer, consumer)
1013 }
1014}
1015
1016/// Implement support for `SplitProducer`.
1017impl<T, P> Fissile<P> for &[T]
1018where
1019 P: Fn(&T) -> bool,
1020{
1021 fn length(&self) -> usize {
1022 self.len()
1023 }
1024
1025 fn midpoint(&self, end: usize) -> usize {
1026 end / 2
1027 }
1028
1029 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1030 self[start..end].iter().position(separator)
1031 }
1032
1033 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1034 self[..end].iter().rposition(separator)
1035 }
1036
1037 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1038 if INCL {
1039 // include the separator in the left side
1040 self.split_at(index + 1)
1041 } else {
1042 let (left, right) = self.split_at(index);
1043 (left, &right[1..]) // skip the separator
1044 }
1045 }
1046
1047 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1048 where
1049 F: Folder<Self>,
1050 Self: Send,
1051 {
1052 if INCL {
1053 debug_assert!(!skip_last);
1054 folder.consume_iter(self.split_inclusive(separator))
1055 } else {
1056 let mut split = self.split(separator);
1057 if skip_last {
1058 split.next_back();
1059 }
1060 folder.consume_iter(split)
1061 }
1062 }
1063}
1064
1065/// Parallel iterator over mutable slices separated by a predicate
1066pub struct SplitMut<'data, T, P> {
1067 slice: &'data mut [T],
1068 separator: P,
1069}
1070
1071impl<T: Debug, P> Debug for SplitMut<'_, T, P> {
1072 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073 f.debug_struct("SplitMut")
1074 .field("slice", &self.slice)
1075 .finish()
1076 }
1077}
1078
1079impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1080where
1081 P: Fn(&T) -> bool + Sync + Send,
1082 T: Send,
1083{
1084 type Item = &'data mut [T];
1085
1086 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1087 where
1088 C: UnindexedConsumer<Self::Item>,
1089 {
1090 let producer = SplitProducer::new(self.slice, &self.separator);
1091 bridge_unindexed(producer, consumer)
1092 }
1093}
1094
1095/// Parallel iterator over mutable slices separated by a predicate,
1096/// including the matched part as a terminator.
1097pub struct SplitInclusiveMut<'data, T, P> {
1098 slice: &'data mut [T],
1099 separator: P,
1100}
1101
1102impl<T: Debug, P> Debug for SplitInclusiveMut<'_, T, P> {
1103 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1104 f.debug_struct("SplitInclusiveMut")
1105 .field("slice", &self.slice)
1106 .finish()
1107 }
1108}
1109
1110impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1111where
1112 P: Fn(&T) -> bool + Sync + Send,
1113 T: Send,
1114{
1115 type Item = &'data mut [T];
1116
1117 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1118 where
1119 C: UnindexedConsumer<Self::Item>,
1120 {
1121 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1122 bridge_unindexed(producer, consumer)
1123 }
1124}
1125
1126/// Implement support for `SplitProducer`.
1127impl<T, P> Fissile<P> for &mut [T]
1128where
1129 P: Fn(&T) -> bool,
1130{
1131 fn length(&self) -> usize {
1132 self.len()
1133 }
1134
1135 fn midpoint(&self, end: usize) -> usize {
1136 end / 2
1137 }
1138
1139 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1140 self[start..end].iter().position(separator)
1141 }
1142
1143 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1144 self[..end].iter().rposition(separator)
1145 }
1146
1147 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1148 if INCL {
1149 // include the separator in the left side
1150 self.split_at_mut(index + 1)
1151 } else {
1152 let (left, right) = self.split_at_mut(index);
1153 (left, &mut right[1..]) // skip the separator
1154 }
1155 }
1156
1157 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1158 where
1159 F: Folder<Self>,
1160 Self: Send,
1161 {
1162 if INCL {
1163 debug_assert!(!skip_last);
1164 folder.consume_iter(self.split_inclusive_mut(separator))
1165 } else {
1166 let mut split = self.split_mut(separator);
1167 if skip_last {
1168 split.next_back();
1169 }
1170 folder.consume_iter(split)
1171 }
1172 }
1173}