rayon/slice/
mod.rs

1//! Parallel iterator types for [slices]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [slices]: std::slice
7
8mod chunk_by;
9mod chunks;
10mod rchunks;
11mod sort;
12mod windows;
13
14mod test;
15
16use self::sort::par_mergesort;
17use self::sort::par_quicksort;
18use crate::iter::plumbing::*;
19use crate::iter::*;
20use crate::split_producer::*;
21
22use std::cmp::Ordering;
23use std::fmt::{self, Debug};
24
25pub use self::chunk_by::{ChunkBy, ChunkByMut};
26pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
27pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
28pub use self::windows::{ArrayWindows, Windows};
29
30/// Parallel extensions for slices.
31pub trait ParallelSlice<T: Sync> {
32    /// Returns a plain slice, which is used to implement the rest of the
33    /// parallel methods.
34    fn as_parallel_slice(&self) -> &[T];
35
36    /// Returns a parallel iterator over subslices separated by elements that
37    /// match the separator.
38    ///
39    /// # Examples
40    ///
41    /// ```
42    /// use rayon::prelude::*;
43    /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
44    ///     .par_split(|i| *i == 0)
45    ///     .map(|numbers| numbers.iter().product::<i32>())
46    ///     .collect();
47    /// assert_eq!(products, [6, 64, 162]);
48    /// ```
49    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
50    where
51        P: Fn(&T) -> bool + Sync + Send,
52    {
53        Split {
54            slice: self.as_parallel_slice(),
55            separator,
56        }
57    }
58
59    /// Returns a parallel iterator over subslices separated by elements that
60    /// match the separator, including the matched part as a terminator.
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use rayon::prelude::*;
66    /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
67    ///     .par_split_inclusive(|i| *i == 0)
68    ///     .map(|numbers| numbers.len())
69    ///     .collect();
70    /// assert_eq!(lengths, [4, 4, 3]);
71    /// ```
72    fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
73    where
74        P: Fn(&T) -> bool + Sync + Send,
75    {
76        SplitInclusive {
77            slice: self.as_parallel_slice(),
78            separator,
79        }
80    }
81
82    /// Returns a parallel iterator over all contiguous windows of length
83    /// `window_size`. The windows overlap.
84    ///
85    /// # Examples
86    ///
87    /// ```
88    /// use rayon::prelude::*;
89    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
90    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
91    /// ```
92    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
93        Windows::new(window_size, self.as_parallel_slice())
94    }
95
96    /// Returns a parallel iterator over all contiguous array windows of
97    /// length `N`. The windows overlap.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// use rayon::prelude::*;
103    /// let windows: Vec<_> = [1, 2, 3].par_array_windows().collect();
104    /// assert_eq!(vec![&[1, 2], &[2, 3]], windows);
105    /// ```
106    fn par_array_windows<const N: usize>(&self) -> ArrayWindows<'_, T, N> {
107        ArrayWindows::new(self.as_parallel_slice())
108    }
109
110    /// Returns a parallel iterator over at most `chunk_size` elements of
111    /// `self` at a time. The chunks do not overlap.
112    ///
113    /// If the number of elements in the iterator is not divisible by
114    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
115    /// other chunks will have that exact length.
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// use rayon::prelude::*;
121    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
122    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
123    /// ```
124    #[track_caller]
125    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
126        assert!(chunk_size != 0, "chunk_size must not be zero");
127        Chunks::new(chunk_size, self.as_parallel_slice())
128    }
129
130    /// Returns a parallel iterator over `chunk_size` elements of
131    /// `self` at a time. The chunks do not overlap.
132    ///
133    /// If `chunk_size` does not divide the length of the slice, then the
134    /// last up to `chunk_size-1` elements will be omitted and can be
135    /// retrieved from the remainder function of the iterator.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use rayon::prelude::*;
141    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
142    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
143    /// ```
144    #[track_caller]
145    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
146        assert!(chunk_size != 0, "chunk_size must not be zero");
147        ChunksExact::new(chunk_size, self.as_parallel_slice())
148    }
149
150    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
151    /// starting at the end. The chunks do not overlap.
152    ///
153    /// If the number of elements in the iterator is not divisible by
154    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
155    /// other chunks will have that exact length.
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// use rayon::prelude::*;
161    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
162    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
163    /// ```
164    #[track_caller]
165    fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
166        assert!(chunk_size != 0, "chunk_size must not be zero");
167        RChunks::new(chunk_size, self.as_parallel_slice())
168    }
169
170    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
171    /// starting at the end. The chunks do not overlap.
172    ///
173    /// If `chunk_size` does not divide the length of the slice, then the
174    /// last up to `chunk_size-1` elements will be omitted and can be
175    /// retrieved from the remainder function of the iterator.
176    ///
177    /// # Examples
178    ///
179    /// ```
180    /// use rayon::prelude::*;
181    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
182    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
183    /// ```
184    #[track_caller]
185    fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
186        assert!(chunk_size != 0, "chunk_size must not be zero");
187        RChunksExact::new(chunk_size, self.as_parallel_slice())
188    }
189
190    /// Returns a parallel iterator over the slice producing non-overlapping runs
191    /// of elements using the predicate to separate them.
192    ///
193    /// The predicate is called on two elements following themselves,
194    /// it means the predicate is called on `slice[0]` and `slice[1]`
195    /// then on `slice[1]` and `slice[2]` and so on.
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// use rayon::prelude::*;
201    /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
202    /// assert_eq!(chunks[0], &[1]);
203    /// assert_eq!(chunks[1], &[2, 2]);
204    /// assert_eq!(chunks[2], &[3, 3, 3]);
205    /// ```
206    fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
207    where
208        F: Fn(&T, &T) -> bool + Send + Sync,
209    {
210        ChunkBy::new(self.as_parallel_slice(), pred)
211    }
212}
213
214impl<T: Sync> ParallelSlice<T> for [T] {
215    #[inline]
216    fn as_parallel_slice(&self) -> &[T] {
217        self
218    }
219}
220
221/// Parallel extensions for mutable slices.
222pub trait ParallelSliceMut<T: Send> {
223    /// Returns a plain mutable slice, which is used to implement the rest of
224    /// the parallel methods.
225    fn as_parallel_slice_mut(&mut self) -> &mut [T];
226
227    /// Returns a parallel iterator over mutable subslices separated by
228    /// elements that match the separator.
229    ///
230    /// # Examples
231    ///
232    /// ```
233    /// use rayon::prelude::*;
234    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
235    /// array.par_split_mut(|i| *i == 0)
236    ///      .for_each(|slice| slice.reverse());
237    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
238    /// ```
239    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
240    where
241        P: Fn(&T) -> bool + Sync + Send,
242    {
243        SplitMut {
244            slice: self.as_parallel_slice_mut(),
245            separator,
246        }
247    }
248
249    /// Returns a parallel iterator over mutable subslices separated by elements
250    /// that match the separator, including the matched part as a terminator.
251    ///
252    /// # Examples
253    ///
254    /// ```
255    /// use rayon::prelude::*;
256    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
257    /// array.par_split_inclusive_mut(|i| *i == 0)
258    ///      .for_each(|slice| slice.reverse());
259    /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
260    /// ```
261    fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
262    where
263        P: Fn(&T) -> bool + Sync + Send,
264    {
265        SplitInclusiveMut {
266            slice: self.as_parallel_slice_mut(),
267            separator,
268        }
269    }
270
271    /// Returns a parallel iterator over at most `chunk_size` elements of
272    /// `self` at a time. The chunks are mutable and do not overlap.
273    ///
274    /// If the number of elements in the iterator is not divisible by
275    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
276    /// other chunks will have that exact length.
277    ///
278    /// # Examples
279    ///
280    /// ```
281    /// use rayon::prelude::*;
282    /// let mut array = [1, 2, 3, 4, 5];
283    /// array.par_chunks_mut(2)
284    ///      .for_each(|slice| slice.reverse());
285    /// assert_eq!(array, [2, 1, 4, 3, 5]);
286    /// ```
287    #[track_caller]
288    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
289        assert!(chunk_size != 0, "chunk_size must not be zero");
290        ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
291    }
292
293    /// Returns a parallel iterator over `chunk_size` elements of
294    /// `self` at a time. The chunks are mutable and do not overlap.
295    ///
296    /// If `chunk_size` does not divide the length of the slice, then the
297    /// last up to `chunk_size-1` elements will be omitted and can be
298    /// retrieved from the remainder function of the iterator.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use rayon::prelude::*;
304    /// let mut array = [1, 2, 3, 4, 5];
305    /// array.par_chunks_exact_mut(3)
306    ///      .for_each(|slice| slice.reverse());
307    /// assert_eq!(array, [3, 2, 1, 4, 5]);
308    /// ```
309    #[track_caller]
310    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
311        assert!(chunk_size != 0, "chunk_size must not be zero");
312        ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
313    }
314
315    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
316    /// starting at the end. The chunks are mutable and do not overlap.
317    ///
318    /// If the number of elements in the iterator is not divisible by
319    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
320    /// other chunks will have that exact length.
321    ///
322    /// # Examples
323    ///
324    /// ```
325    /// use rayon::prelude::*;
326    /// let mut array = [1, 2, 3, 4, 5];
327    /// array.par_rchunks_mut(2)
328    ///      .for_each(|slice| slice.reverse());
329    /// assert_eq!(array, [1, 3, 2, 5, 4]);
330    /// ```
331    #[track_caller]
332    fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
333        assert!(chunk_size != 0, "chunk_size must not be zero");
334        RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
335    }
336
337    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
338    /// starting at the end. The chunks are mutable and do not overlap.
339    ///
340    /// If `chunk_size` does not divide the length of the slice, then the
341    /// last up to `chunk_size-1` elements will be omitted and can be
342    /// retrieved from the remainder function of the iterator.
343    ///
344    /// # Examples
345    ///
346    /// ```
347    /// use rayon::prelude::*;
348    /// let mut array = [1, 2, 3, 4, 5];
349    /// array.par_rchunks_exact_mut(3)
350    ///      .for_each(|slice| slice.reverse());
351    /// assert_eq!(array, [1, 2, 5, 4, 3]);
352    /// ```
353    #[track_caller]
354    fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
355        assert!(chunk_size != 0, "chunk_size must not be zero");
356        RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
357    }
358
359    /// Sorts the slice in parallel.
360    ///
361    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
362    ///
363    /// When applicable, unstable sorting is preferred because it is generally faster than stable
364    /// sorting and it doesn't allocate auxiliary memory.
365    /// See [`par_sort_unstable`](#method.par_sort_unstable).
366    ///
367    /// # Current implementation
368    ///
369    /// The current algorithm is an adaptive merge sort inspired by
370    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
371    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
372    /// two or more sorted sequences concatenated one after another.
373    ///
374    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
375    /// non-allocating insertion sort is used instead.
376    ///
377    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
378    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
379    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
380    /// parallel subdivision of chunks and parallel merge operation.
381    ///
382    /// # Examples
383    ///
384    /// ```
385    /// use rayon::prelude::*;
386    ///
387    /// let mut v = [-5, 4, 1, -3, 2];
388    ///
389    /// v.par_sort();
390    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
391    /// ```
392    fn par_sort(&mut self)
393    where
394        T: Ord,
395    {
396        par_mergesort(self.as_parallel_slice_mut(), T::lt);
397    }
398
399    /// Sorts the slice in parallel with a comparator function.
400    ///
401    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
402    ///
403    /// The comparator function must define a total ordering for the elements in the slice. If
404    /// the ordering is not total, the order of the elements is unspecified. An order is a
405    /// total order if it is (for all `a`, `b` and `c`):
406    ///
407    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
408    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
409    ///
410    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
411    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
412    ///
413    /// ```
414    /// use rayon::prelude::*;
415    ///
416    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
417    /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
418    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
419    /// ```
420    ///
421    /// When applicable, unstable sorting is preferred because it is generally faster than stable
422    /// sorting and it doesn't allocate auxiliary memory.
423    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
424    ///
425    /// # Current implementation
426    ///
427    /// The current algorithm is an adaptive merge sort inspired by
428    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
429    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
430    /// two or more sorted sequences concatenated one after another.
431    ///
432    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
433    /// non-allocating insertion sort is used instead.
434    ///
435    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
436    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
437    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
438    /// parallel subdivision of chunks and parallel merge operation.
439    ///
440    /// # Examples
441    ///
442    /// ```
443    /// use rayon::prelude::*;
444    ///
445    /// let mut v = [5, 4, 1, 3, 2];
446    /// v.par_sort_by(|a, b| a.cmp(b));
447    /// assert_eq!(v, [1, 2, 3, 4, 5]);
448    ///
449    /// // reverse sorting
450    /// v.par_sort_by(|a, b| b.cmp(a));
451    /// assert_eq!(v, [5, 4, 3, 2, 1]);
452    /// ```
453    fn par_sort_by<F>(&mut self, compare: F)
454    where
455        F: Fn(&T, &T) -> Ordering + Sync,
456    {
457        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
458            compare(a, b) == Ordering::Less
459        });
460    }
461
462    /// Sorts the slice in parallel with a key extraction function.
463    ///
464    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
465    /// worst-case, where the key function is *O*(*m*).
466    ///
467    /// For expensive key functions (e.g. functions that are not simple property accesses or
468    /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
469    /// be significantly faster, as it does not recompute element keys.
470    ///
471    /// When applicable, unstable sorting is preferred because it is generally faster than stable
472    /// sorting and it doesn't allocate auxiliary memory.
473    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
474    ///
475    /// # Current implementation
476    ///
477    /// The current algorithm is an adaptive merge sort inspired by
478    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
479    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
480    /// two or more sorted sequences concatenated one after another.
481    ///
482    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
483    /// non-allocating insertion sort is used instead.
484    ///
485    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
486    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
487    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
488    /// parallel subdivision of chunks and parallel merge operation.
489    ///
490    /// # Examples
491    ///
492    /// ```
493    /// use rayon::prelude::*;
494    ///
495    /// let mut v = [-5i32, 4, 1, -3, 2];
496    ///
497    /// v.par_sort_by_key(|k| k.abs());
498    /// assert_eq!(v, [1, 2, -3, 4, -5]);
499    /// ```
500    fn par_sort_by_key<K, F>(&mut self, f: F)
501    where
502        K: Ord,
503        F: Fn(&T) -> K + Sync,
504    {
505        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
506    }
507
508    /// Sorts the slice in parallel with a key extraction function.
509    ///
510    /// During sorting, the key function is called at most once per element, by using
511    /// temporary storage to remember the results of key evaluation.
512    /// The key function is called in parallel, so the order of calls is completely unspecified.
513    ///
514    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
515    /// worst-case, where the key function is *O*(*m*).
516    ///
517    /// For simple key functions (e.g., functions that are property accesses or
518    /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
519    /// faster.
520    ///
521    /// # Current implementation
522    ///
523    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
524    /// which combines the fast average case of randomized quicksort with the fast worst case of
525    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
526    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
527    /// deterministic behavior.
528    ///
529    /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
530    /// length of the slice.
531    ///
532    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
533    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
534    /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
535    ///
536    /// [pdqsort]: https://github.com/orlp/pdqsort
537    ///
538    /// # Examples
539    ///
540    /// ```
541    /// use rayon::prelude::*;
542    ///
543    /// let mut v = [-5i32, 4, 32, -3, 2];
544    ///
545    /// v.par_sort_by_cached_key(|k| k.to_string());
546    /// assert!(v == [-3, -5, 2, 32, 4]);
547    /// ```
548    fn par_sort_by_cached_key<K, F>(&mut self, f: F)
549    where
550        F: Fn(&T) -> K + Sync,
551        K: Ord + Send,
552    {
553        let slice = self.as_parallel_slice_mut();
554        let len = slice.len();
555        if len < 2 {
556            return;
557        }
558
559        // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
560        macro_rules! sort_by_key {
561            ($t:ty) => {{
562                let mut indices: Vec<_> = slice
563                    .par_iter_mut()
564                    .enumerate()
565                    .map(|(i, x)| (f(&*x), i as $t))
566                    .collect();
567                // The elements of `indices` are unique, as they are indexed, so any sort will be
568                // stable with respect to the original slice. We use `sort_unstable` here because
569                // it requires less memory allocation.
570                indices.par_sort_unstable();
571                for i in 0..len {
572                    let mut index = indices[i].1;
573                    while (index as usize) < i {
574                        index = indices[index as usize].1;
575                    }
576                    indices[i].1 = index;
577                    slice.swap(i, index as usize);
578                }
579            }};
580        }
581
582        let sz_u8 = size_of::<(K, u8)>();
583        let sz_u16 = size_of::<(K, u16)>();
584        let sz_u32 = size_of::<(K, u32)>();
585        let sz_usize = size_of::<(K, usize)>();
586
587        if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
588            return sort_by_key!(u8);
589        }
590        if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
591            return sort_by_key!(u16);
592        }
593        if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
594            return sort_by_key!(u32);
595        }
596        sort_by_key!(usize)
597    }
598
599    /// Sorts the slice in parallel, but might not preserve the order of equal elements.
600    ///
601    /// This sort is unstable (i.e., may reorder equal elements), in-place
602    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
603    ///
604    /// # Current implementation
605    ///
606    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
607    /// which combines the fast average case of randomized quicksort with the fast worst case of
608    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
609    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
610    /// deterministic behavior.
611    ///
612    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
613    /// slice consists of several concatenated sorted sequences.
614    ///
615    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
616    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
617    /// parallel.
618    ///
619    /// [pdqsort]: https://github.com/orlp/pdqsort
620    ///
621    /// # Examples
622    ///
623    /// ```
624    /// use rayon::prelude::*;
625    ///
626    /// let mut v = [-5, 4, 1, -3, 2];
627    ///
628    /// v.par_sort_unstable();
629    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
630    /// ```
631    fn par_sort_unstable(&mut self)
632    where
633        T: Ord,
634    {
635        par_quicksort(self.as_parallel_slice_mut(), T::lt);
636    }
637
638    /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
639    /// equal elements.
640    ///
641    /// This sort is unstable (i.e., may reorder equal elements), in-place
642    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
643    ///
644    /// The comparator function must define a total ordering for the elements in the slice. If
645    /// the ordering is not total, the order of the elements is unspecified. An order is a
646    /// total order if it is (for all `a`, `b` and `c`):
647    ///
648    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
649    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
650    ///
651    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
652    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
653    ///
654    /// ```
655    /// use rayon::prelude::*;
656    ///
657    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
658    /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
659    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
660    /// ```
661    ///
662    /// # Current implementation
663    ///
664    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
665    /// which combines the fast average case of randomized quicksort with the fast worst case of
666    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
667    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
668    /// deterministic behavior.
669    ///
670    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
671    /// slice consists of several concatenated sorted sequences.
672    ///
673    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
674    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
675    /// parallel.
676    ///
677    /// [pdqsort]: https://github.com/orlp/pdqsort
678    ///
679    /// # Examples
680    ///
681    /// ```
682    /// use rayon::prelude::*;
683    ///
684    /// let mut v = [5, 4, 1, 3, 2];
685    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
686    /// assert_eq!(v, [1, 2, 3, 4, 5]);
687    ///
688    /// // reverse sorting
689    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
690    /// assert_eq!(v, [5, 4, 3, 2, 1]);
691    /// ```
692    fn par_sort_unstable_by<F>(&mut self, compare: F)
693    where
694        F: Fn(&T, &T) -> Ordering + Sync,
695    {
696        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
697            compare(a, b) == Ordering::Less
698        });
699    }
700
701    /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
702    /// of equal elements.
703    ///
704    /// This sort is unstable (i.e., may reorder equal elements), in-place
705    /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
706    /// where the key function is *O*(*m*).
707    ///
708    /// # Current implementation
709    ///
710    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
711    /// which combines the fast average case of randomized quicksort with the fast worst case of
712    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
713    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
714    /// deterministic behavior.
715    ///
716    /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
717    /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
718    /// is expensive.
719    ///
720    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
721    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
722    /// parallel.
723    ///
724    /// [pdqsort]: https://github.com/orlp/pdqsort
725    ///
726    /// # Examples
727    ///
728    /// ```
729    /// use rayon::prelude::*;
730    ///
731    /// let mut v = [-5i32, 4, 1, -3, 2];
732    ///
733    /// v.par_sort_unstable_by_key(|k| k.abs());
734    /// assert_eq!(v, [1, 2, -3, 4, -5]);
735    /// ```
736    fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
737    where
738        K: Ord,
739        F: Fn(&T) -> K + Sync,
740    {
741        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
742    }
743
744    /// Returns a parallel iterator over the slice producing non-overlapping mutable
745    /// runs of elements using the predicate to separate them.
746    ///
747    /// The predicate is called on two elements following themselves,
748    /// it means the predicate is called on `slice[0]` and `slice[1]`
749    /// then on `slice[1]` and `slice[2]` and so on.
750    ///
751    /// # Examples
752    ///
753    /// ```
754    /// use rayon::prelude::*;
755    /// let mut xs = [1, 2, 2, 3, 3, 3];
756    /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
757    /// assert_eq!(chunks[0], &mut [1]);
758    /// assert_eq!(chunks[1], &mut [2, 2]);
759    /// assert_eq!(chunks[2], &mut [3, 3, 3]);
760    /// ```
761    fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
762    where
763        F: Fn(&T, &T) -> bool + Send + Sync,
764    {
765        ChunkByMut::new(self.as_parallel_slice_mut(), pred)
766    }
767}
768
769impl<T: Send> ParallelSliceMut<T> for [T] {
770    #[inline]
771    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
772        self
773    }
774}
775
776impl<'data, T: Sync> IntoParallelIterator for &'data [T] {
777    type Item = &'data T;
778    type Iter = Iter<'data, T>;
779
780    fn into_par_iter(self) -> Self::Iter {
781        Iter { slice: self }
782    }
783}
784
785impl<'data, T: Sync> IntoParallelIterator for &'data Box<[T]> {
786    type Item = &'data T;
787    type Iter = Iter<'data, T>;
788
789    fn into_par_iter(self) -> Self::Iter {
790        Iter { slice: self }
791    }
792}
793
794impl<'data, T: Send> IntoParallelIterator for &'data mut [T] {
795    type Item = &'data mut T;
796    type Iter = IterMut<'data, T>;
797
798    fn into_par_iter(self) -> Self::Iter {
799        IterMut { slice: self }
800    }
801}
802
803impl<'data, T: Send> IntoParallelIterator for &'data mut Box<[T]> {
804    type Item = &'data mut T;
805    type Iter = IterMut<'data, T>;
806
807    fn into_par_iter(self) -> Self::Iter {
808        IterMut { slice: self }
809    }
810}
811
812/// Parallel iterator over immutable items in a slice
813#[derive(Debug)]
814pub struct Iter<'data, T> {
815    slice: &'data [T],
816}
817
818impl<T> Clone for Iter<'_, T> {
819    fn clone(&self) -> Self {
820        Iter { ..*self }
821    }
822}
823
824impl<'data, T: Sync> ParallelIterator for Iter<'data, T> {
825    type Item = &'data T;
826
827    fn drive_unindexed<C>(self, consumer: C) -> C::Result
828    where
829        C: UnindexedConsumer<Self::Item>,
830    {
831        bridge(self, consumer)
832    }
833
834    fn opt_len(&self) -> Option<usize> {
835        Some(self.len())
836    }
837}
838
839impl<T: Sync> IndexedParallelIterator for Iter<'_, T> {
840    fn drive<C>(self, consumer: C) -> C::Result
841    where
842        C: Consumer<Self::Item>,
843    {
844        bridge(self, consumer)
845    }
846
847    fn len(&self) -> usize {
848        self.slice.len()
849    }
850
851    fn with_producer<CB>(self, callback: CB) -> CB::Output
852    where
853        CB: ProducerCallback<Self::Item>,
854    {
855        callback.callback(IterProducer { slice: self.slice })
856    }
857}
858
859struct IterProducer<'data, T: Sync> {
860    slice: &'data [T],
861}
862
863impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
864    type Item = &'data T;
865    type IntoIter = ::std::slice::Iter<'data, T>;
866
867    fn into_iter(self) -> Self::IntoIter {
868        self.slice.iter()
869    }
870
871    fn split_at(self, index: usize) -> (Self, Self) {
872        let (left, right) = self.slice.split_at(index);
873        (IterProducer { slice: left }, IterProducer { slice: right })
874    }
875}
876
877/// Parallel iterator over mutable items in a slice
878#[derive(Debug)]
879pub struct IterMut<'data, T> {
880    slice: &'data mut [T],
881}
882
883impl<'data, T: Send> ParallelIterator for IterMut<'data, T> {
884    type Item = &'data mut T;
885
886    fn drive_unindexed<C>(self, consumer: C) -> C::Result
887    where
888        C: UnindexedConsumer<Self::Item>,
889    {
890        bridge(self, consumer)
891    }
892
893    fn opt_len(&self) -> Option<usize> {
894        Some(self.len())
895    }
896}
897
898impl<T: Send> IndexedParallelIterator for IterMut<'_, T> {
899    fn drive<C>(self, consumer: C) -> C::Result
900    where
901        C: Consumer<Self::Item>,
902    {
903        bridge(self, consumer)
904    }
905
906    fn len(&self) -> usize {
907        self.slice.len()
908    }
909
910    fn with_producer<CB>(self, callback: CB) -> CB::Output
911    where
912        CB: ProducerCallback<Self::Item>,
913    {
914        callback.callback(IterMutProducer { slice: self.slice })
915    }
916}
917
918struct IterMutProducer<'data, T: Send> {
919    slice: &'data mut [T],
920}
921
922impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
923    type Item = &'data mut T;
924    type IntoIter = ::std::slice::IterMut<'data, T>;
925
926    fn into_iter(self) -> Self::IntoIter {
927        self.slice.iter_mut()
928    }
929
930    fn split_at(self, index: usize) -> (Self, Self) {
931        let (left, right) = self.slice.split_at_mut(index);
932        (
933            IterMutProducer { slice: left },
934            IterMutProducer { slice: right },
935        )
936    }
937}
938
939/// Parallel iterator over slices separated by a predicate
940pub struct Split<'data, T, P> {
941    slice: &'data [T],
942    separator: P,
943}
944
945impl<T, P: Clone> Clone for Split<'_, T, P> {
946    fn clone(&self) -> Self {
947        Split {
948            separator: self.separator.clone(),
949            ..*self
950        }
951    }
952}
953
954impl<T: Debug, P> Debug for Split<'_, T, P> {
955    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
956        f.debug_struct("Split").field("slice", &self.slice).finish()
957    }
958}
959
960impl<'data, T, P> ParallelIterator for Split<'data, T, P>
961where
962    P: Fn(&T) -> bool + Sync + Send,
963    T: Sync,
964{
965    type Item = &'data [T];
966
967    fn drive_unindexed<C>(self, consumer: C) -> C::Result
968    where
969        C: UnindexedConsumer<Self::Item>,
970    {
971        let producer = SplitProducer::new(self.slice, &self.separator);
972        bridge_unindexed(producer, consumer)
973    }
974}
975
976/// Parallel iterator over slices separated by a predicate,
977/// including the matched part as a terminator.
978pub struct SplitInclusive<'data, T, P> {
979    slice: &'data [T],
980    separator: P,
981}
982
983impl<T, P: Clone> Clone for SplitInclusive<'_, T, P> {
984    fn clone(&self) -> Self {
985        SplitInclusive {
986            separator: self.separator.clone(),
987            ..*self
988        }
989    }
990}
991
992impl<T: Debug, P> Debug for SplitInclusive<'_, T, P> {
993    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
994        f.debug_struct("SplitInclusive")
995            .field("slice", &self.slice)
996            .finish()
997    }
998}
999
1000impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1001where
1002    P: Fn(&T) -> bool + Sync + Send,
1003    T: Sync,
1004{
1005    type Item = &'data [T];
1006
1007    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1008    where
1009        C: UnindexedConsumer<Self::Item>,
1010    {
1011        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1012        bridge_unindexed(producer, consumer)
1013    }
1014}
1015
1016/// Implement support for `SplitProducer`.
1017impl<T, P> Fissile<P> for &[T]
1018where
1019    P: Fn(&T) -> bool,
1020{
1021    fn length(&self) -> usize {
1022        self.len()
1023    }
1024
1025    fn midpoint(&self, end: usize) -> usize {
1026        end / 2
1027    }
1028
1029    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1030        self[start..end].iter().position(separator)
1031    }
1032
1033    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1034        self[..end].iter().rposition(separator)
1035    }
1036
1037    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1038        if INCL {
1039            // include the separator in the left side
1040            self.split_at(index + 1)
1041        } else {
1042            let (left, right) = self.split_at(index);
1043            (left, &right[1..]) // skip the separator
1044        }
1045    }
1046
1047    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1048    where
1049        F: Folder<Self>,
1050        Self: Send,
1051    {
1052        if INCL {
1053            debug_assert!(!skip_last);
1054            folder.consume_iter(self.split_inclusive(separator))
1055        } else {
1056            let mut split = self.split(separator);
1057            if skip_last {
1058                split.next_back();
1059            }
1060            folder.consume_iter(split)
1061        }
1062    }
1063}
1064
1065/// Parallel iterator over mutable slices separated by a predicate
1066pub struct SplitMut<'data, T, P> {
1067    slice: &'data mut [T],
1068    separator: P,
1069}
1070
1071impl<T: Debug, P> Debug for SplitMut<'_, T, P> {
1072    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073        f.debug_struct("SplitMut")
1074            .field("slice", &self.slice)
1075            .finish()
1076    }
1077}
1078
1079impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1080where
1081    P: Fn(&T) -> bool + Sync + Send,
1082    T: Send,
1083{
1084    type Item = &'data mut [T];
1085
1086    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1087    where
1088        C: UnindexedConsumer<Self::Item>,
1089    {
1090        let producer = SplitProducer::new(self.slice, &self.separator);
1091        bridge_unindexed(producer, consumer)
1092    }
1093}
1094
1095/// Parallel iterator over mutable slices separated by a predicate,
1096/// including the matched part as a terminator.
1097pub struct SplitInclusiveMut<'data, T, P> {
1098    slice: &'data mut [T],
1099    separator: P,
1100}
1101
1102impl<T: Debug, P> Debug for SplitInclusiveMut<'_, T, P> {
1103    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1104        f.debug_struct("SplitInclusiveMut")
1105            .field("slice", &self.slice)
1106            .finish()
1107    }
1108}
1109
1110impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1111where
1112    P: Fn(&T) -> bool + Sync + Send,
1113    T: Send,
1114{
1115    type Item = &'data mut [T];
1116
1117    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1118    where
1119        C: UnindexedConsumer<Self::Item>,
1120    {
1121        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1122        bridge_unindexed(producer, consumer)
1123    }
1124}
1125
1126/// Implement support for `SplitProducer`.
1127impl<T, P> Fissile<P> for &mut [T]
1128where
1129    P: Fn(&T) -> bool,
1130{
1131    fn length(&self) -> usize {
1132        self.len()
1133    }
1134
1135    fn midpoint(&self, end: usize) -> usize {
1136        end / 2
1137    }
1138
1139    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1140        self[start..end].iter().position(separator)
1141    }
1142
1143    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1144        self[..end].iter().rposition(separator)
1145    }
1146
1147    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1148        if INCL {
1149            // include the separator in the left side
1150            self.split_at_mut(index + 1)
1151        } else {
1152            let (left, right) = self.split_at_mut(index);
1153            (left, &mut right[1..]) // skip the separator
1154        }
1155    }
1156
1157    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1158    where
1159        F: Folder<Self>,
1160        Self: Send,
1161    {
1162        if INCL {
1163            debug_assert!(!skip_last);
1164            folder.consume_iter(self.split_inclusive_mut(separator))
1165        } else {
1166            let mut split = self.split_mut(separator);
1167            if skip_last {
1168                split.next_back();
1169            }
1170            folder.consume_iter(split)
1171        }
1172    }
1173}