rayon/
str.rs

1//! Parallel iterator types for [strings]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7//! reference a `Pattern` trait which is not visible outside this crate.
8//! This trait is intentionally kept private, for use only by Rayon itself.
9//! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10//! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11//!
12//! [`par_split_terminator()`]: ParallelString::par_split_terminator()
13//! [strings]: std::str
14
15use crate::iter::plumbing::*;
16use crate::iter::*;
17use crate::split_producer::*;
18
19/// Test if a byte is the start of a UTF-8 character.
20/// (extracted from `str::is_char_boundary`)
21#[inline]
22fn is_char_boundary(b: u8) -> bool {
23    // This is bit magic equivalent to: b < 128 || b >= 192
24    (b as i8) >= -0x40
25}
26
27/// Find the index of a character boundary near the midpoint.
28#[inline]
29fn find_char_midpoint(chars: &str) -> usize {
30    let mid = chars.len() / 2;
31
32    // We want to split near the midpoint, but we need to find an actual
33    // character boundary.  So we look at the raw bytes, first scanning
34    // forward from the midpoint for a boundary, then trying backward.
35    // TODO (MSRV 1.91): use `str::ceil_char_boundary`, else `floor_...`.
36    let (left, right) = chars.as_bytes().split_at(mid);
37    match right.iter().copied().position(is_char_boundary) {
38        Some(i) => mid + i,
39        None => left
40            .iter()
41            .copied()
42            .rposition(is_char_boundary)
43            .unwrap_or(0),
44    }
45}
46
47/// Try to split a string near the midpoint.
48#[inline]
49fn split(chars: &str) -> Option<(&str, &str)> {
50    let index = find_char_midpoint(chars);
51    if index > 0 {
52        Some(chars.split_at(index))
53    } else {
54        None
55    }
56}
57
58/// Parallel extensions for strings.
59pub trait ParallelString {
60    /// Returns a plain string slice, which is used to implement the rest of
61    /// the parallel methods.
62    fn as_parallel_string(&self) -> &str;
63
64    /// Returns a parallel iterator over the characters of a string.
65    ///
66    /// # Examples
67    ///
68    /// ```
69    /// use rayon::prelude::*;
70    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
71    /// assert_eq!(Some('o'), max);
72    /// ```
73    fn par_chars(&self) -> Chars<'_> {
74        Chars {
75            chars: self.as_parallel_string(),
76        }
77    }
78
79    /// Returns a parallel iterator over the characters of a string, with their positions.
80    ///
81    /// # Examples
82    ///
83    /// ```
84    /// use rayon::prelude::*;
85    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
86    /// assert_eq!(Some((1, 'e')), min);
87    /// ```
88    fn par_char_indices(&self) -> CharIndices<'_> {
89        CharIndices {
90            chars: self.as_parallel_string(),
91        }
92    }
93
94    /// Returns a parallel iterator over the bytes of a string.
95    ///
96    /// Note that multi-byte sequences (for code points greater than `U+007F`)
97    /// are produced as separate items, but will not be split across threads.
98    /// If you would prefer an indexed iterator without that guarantee, consider
99    /// `string.as_bytes().par_iter().copied()` instead.
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// use rayon::prelude::*;
105    /// let max = "hello".par_bytes().max();
106    /// assert_eq!(Some(b'o'), max);
107    /// ```
108    fn par_bytes(&self) -> Bytes<'_> {
109        Bytes {
110            chars: self.as_parallel_string(),
111        }
112    }
113
114    /// Returns a parallel iterator over a string encoded as UTF-16.
115    ///
116    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
117    /// produced as separate items, but will not be split across threads.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// use rayon::prelude::*;
123    ///
124    /// let max = "hello".par_encode_utf16().max();
125    /// assert_eq!(Some(b'o' as u16), max);
126    ///
127    /// let text = "Zażółć gęślą jaźń";
128    /// let utf8_len = text.len();
129    /// let utf16_len = text.par_encode_utf16().count();
130    /// assert!(utf16_len <= utf8_len);
131    /// ```
132    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
133        EncodeUtf16 {
134            chars: self.as_parallel_string(),
135        }
136    }
137
138    /// Returns a parallel iterator over substrings separated by a
139    /// given character or predicate, similar to `str::split`.
140    ///
141    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
142    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
143    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// use rayon::prelude::*;
149    /// let total = "1, 2, buckle, 3, 4, door"
150    ///    .par_split(',')
151    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
152    ///    .sum();
153    /// assert_eq!(10, total);
154    /// ```
155    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
156        Split::new(self.as_parallel_string(), separator)
157    }
158
159    /// Returns a parallel iterator over substrings separated by a
160    /// given character or predicate, keeping the matched part as a terminator
161    /// of the substring similar to `str::split_inclusive`.
162    ///
163    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
164    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
165    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use rayon::prelude::*;
171    /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
172    ///    .par_split_inclusive('\n')
173    ///    .collect();
174    /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
175    /// ```
176    fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
177        SplitInclusive::new(self.as_parallel_string(), separator)
178    }
179
180    /// Returns a parallel iterator over substrings terminated by a
181    /// given character or predicate, similar to `str::split_terminator`.
182    /// It's equivalent to `par_split`, except it doesn't produce an empty
183    /// substring after a trailing terminator.
184    ///
185    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
186    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
187    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
188    ///
189    /// # Examples
190    ///
191    /// ```
192    /// use rayon::prelude::*;
193    /// let parts: Vec<_> = "((1 + 3) * 2)"
194    ///     .par_split_terminator(|c| c == '(' || c == ')')
195    ///     .collect();
196    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
197    /// ```
198    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
199        SplitTerminator::new(self.as_parallel_string(), terminator)
200    }
201
202    /// Returns a parallel iterator over the lines of a string, ending with an
203    /// optional carriage return and with a newline (`\r\n` or just `\n`).
204    /// The final line ending is optional, and line endings are not included in
205    /// the output strings.
206    ///
207    /// # Examples
208    ///
209    /// ```
210    /// use rayon::prelude::*;
211    /// let lengths: Vec<_> = "hello world\nfizbuzz"
212    ///     .par_lines()
213    ///     .map(|l| l.len())
214    ///     .collect();
215    /// assert_eq!(vec![11, 7], lengths);
216    /// ```
217    fn par_lines(&self) -> Lines<'_> {
218        Lines(self.as_parallel_string())
219    }
220
221    /// Returns a parallel iterator over the sub-slices of a string that are
222    /// separated by any amount of whitespace.
223    ///
224    /// As with `str::split_whitespace`, 'whitespace' is defined according to
225    /// the terms of the Unicode Derived Core Property `White_Space`.
226    /// If you only want to split on ASCII whitespace instead, use
227    /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
228    ///
229    /// # Examples
230    ///
231    /// ```
232    /// use rayon::prelude::*;
233    /// let longest = "which is the longest word?"
234    ///     .par_split_whitespace()
235    ///     .max_by_key(|word| word.len());
236    /// assert_eq!(Some("longest"), longest);
237    /// ```
238    ///
239    /// All kinds of whitespace are considered:
240    ///
241    /// ```
242    /// use rayon::prelude::*;
243    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
244    ///     .par_split_whitespace()
245    ///     .collect();
246    /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
247    /// ```
248    ///
249    /// If the string is empty or all whitespace, the iterator yields no string slices:
250    ///
251    /// ```
252    /// use rayon::prelude::*;
253    /// assert_eq!("".par_split_whitespace().count(), 0);
254    /// assert_eq!("   ".par_split_whitespace().count(), 0);
255    /// ```
256    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
257        SplitWhitespace(self.as_parallel_string())
258    }
259
260    /// Returns a parallel iterator over the sub-slices of a string that are
261    /// separated by any amount of ASCII whitespace.
262    ///
263    /// To split by Unicode `White_Space` instead, use
264    /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
265    ///
266    /// # Examples
267    ///
268    /// ```
269    /// use rayon::prelude::*;
270    /// let longest = "which is the longest word?"
271    ///     .par_split_ascii_whitespace()
272    ///     .max_by_key(|word| word.len());
273    /// assert_eq!(Some("longest"), longest);
274    /// ```
275    ///
276    /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
277    ///
278    /// ```
279    /// use rayon::prelude::*;
280    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
281    ///     .par_split_ascii_whitespace()
282    ///     .collect();
283    /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
284    /// ```
285    ///
286    /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
287    ///
288    /// ```
289    /// use rayon::prelude::*;
290    /// assert_eq!("".par_split_whitespace().count(), 0);
291    /// assert_eq!("   ".par_split_whitespace().count(), 0);
292    /// ```
293    fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
294        SplitAsciiWhitespace(self.as_parallel_string())
295    }
296
297    /// Returns a parallel iterator over substrings that match a
298    /// given character or predicate, similar to `str::matches`.
299    ///
300    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
301    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
302    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
303    ///
304    /// # Examples
305    ///
306    /// ```
307    /// use rayon::prelude::*;
308    /// let total = "1, 2, buckle, 3, 4, door"
309    ///    .par_matches(char::is_numeric)
310    ///    .map(|s| s.parse::<i32>().expect("digit"))
311    ///    .sum();
312    /// assert_eq!(10, total);
313    /// ```
314    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
315        Matches {
316            chars: self.as_parallel_string(),
317            pattern,
318        }
319    }
320
321    /// Returns a parallel iterator over substrings that match a given character
322    /// or predicate, with their positions, similar to `str::match_indices`.
323    ///
324    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
325    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
326    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
327    ///
328    /// # Examples
329    ///
330    /// ```
331    /// use rayon::prelude::*;
332    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
333    ///    .par_match_indices(char::is_numeric)
334    ///    .collect();
335    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
336    /// ```
337    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
338        MatchIndices {
339            chars: self.as_parallel_string(),
340            pattern,
341        }
342    }
343}
344
345impl ParallelString for str {
346    #[inline]
347    fn as_parallel_string(&self) -> &str {
348        self
349    }
350}
351
352// /////////////////////////////////////////////////////////////////////////
353
354/// We hide the `Pattern` trait in a private module, as its API is not meant
355/// for general consumption.  If we could have privacy on trait items, then it
356/// would be nicer to have its basic existence and implementors public while
357/// keeping all of the methods private.
358mod private {
359    use crate::iter::plumbing::Folder;
360
361    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
362    /// `std::str::pattern::{Pattern, Searcher}`.
363    ///
364    /// Implementing this trait is not permitted outside of `rayon`.
365    pub trait Pattern: Sized + Sync + Send {
366        private_decl! {}
367        fn find_in(&self, haystack: &str) -> Option<usize>;
368        fn rfind_in(&self, haystack: &str) -> Option<usize>;
369        fn is_suffix_of(&self, haystack: &str) -> bool;
370        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
371        where
372            F: Folder<&'ch str>;
373        fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
374        where
375            F: Folder<&'ch str>;
376        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
377        where
378            F: Folder<&'ch str>;
379        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
380        where
381            F: Folder<(usize, &'ch str)>;
382    }
383}
384use self::private::Pattern;
385
386#[inline]
387fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
388    move |(i, x)| (base + i, x)
389}
390
391macro_rules! impl_pattern {
392    (&$self:ident => $pattern:expr) => {
393        private_impl! {}
394
395        #[inline]
396        fn find_in(&$self, chars: &str) -> Option<usize> {
397            chars.find($pattern)
398        }
399
400        #[inline]
401        fn rfind_in(&$self, chars: &str) -> Option<usize> {
402            chars.rfind($pattern)
403        }
404
405        #[inline]
406        fn is_suffix_of(&$self, chars: &str) -> bool {
407            chars.ends_with($pattern)
408        }
409
410        fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
411        where
412            F: Folder<&'ch str>,
413        {
414            let mut split = chars.split($pattern);
415            if skip_last {
416                split.next_back();
417            }
418            folder.consume_iter(split)
419        }
420
421        fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
422        where
423            F: Folder<&'ch str>,
424        {
425            folder.consume_iter(chars.split_inclusive($pattern))
426        }
427
428        fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
429        where
430            F: Folder<&'ch str>,
431        {
432            folder.consume_iter(chars.matches($pattern))
433        }
434
435        fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
436        where
437            F: Folder<(usize, &'ch str)>,
438        {
439            folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
440        }
441    }
442}
443
444impl Pattern for char {
445    impl_pattern!(&self => *self);
446}
447
448impl Pattern for &[char] {
449    impl_pattern!(&self => *self);
450}
451
452impl<const N: usize> Pattern for [char; N] {
453    impl_pattern!(&self => *self);
454}
455
456impl<const N: usize> Pattern for &[char; N] {
457    impl_pattern!(&self => *self);
458}
459
460impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
461    impl_pattern!(&self => self);
462}
463
464// /////////////////////////////////////////////////////////////////////////
465
466/// Parallel iterator over the characters of a string
467#[derive(Debug, Clone)]
468pub struct Chars<'ch> {
469    chars: &'ch str,
470}
471
472struct CharsProducer<'ch> {
473    chars: &'ch str,
474}
475
476impl<'ch> ParallelIterator for Chars<'ch> {
477    type Item = char;
478
479    fn drive_unindexed<C>(self, consumer: C) -> C::Result
480    where
481        C: UnindexedConsumer<Self::Item>,
482    {
483        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
484    }
485}
486
487impl<'ch> UnindexedProducer for CharsProducer<'ch> {
488    type Item = char;
489
490    fn split(self) -> (Self, Option<Self>) {
491        match split(self.chars) {
492            Some((left, right)) => (
493                CharsProducer { chars: left },
494                Some(CharsProducer { chars: right }),
495            ),
496            None => (self, None),
497        }
498    }
499
500    fn fold_with<F>(self, folder: F) -> F
501    where
502        F: Folder<Self::Item>,
503    {
504        folder.consume_iter(self.chars.chars())
505    }
506}
507
508// /////////////////////////////////////////////////////////////////////////
509
510/// Parallel iterator over the characters of a string, with their positions
511#[derive(Debug, Clone)]
512pub struct CharIndices<'ch> {
513    chars: &'ch str,
514}
515
516struct CharIndicesProducer<'ch> {
517    index: usize,
518    chars: &'ch str,
519}
520
521impl<'ch> ParallelIterator for CharIndices<'ch> {
522    type Item = (usize, char);
523
524    fn drive_unindexed<C>(self, consumer: C) -> C::Result
525    where
526        C: UnindexedConsumer<Self::Item>,
527    {
528        let producer = CharIndicesProducer {
529            index: 0,
530            chars: self.chars,
531        };
532        bridge_unindexed(producer, consumer)
533    }
534}
535
536impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
537    type Item = (usize, char);
538
539    fn split(self) -> (Self, Option<Self>) {
540        match split(self.chars) {
541            Some((left, right)) => (
542                CharIndicesProducer {
543                    chars: left,
544                    ..self
545                },
546                Some(CharIndicesProducer {
547                    chars: right,
548                    index: self.index + left.len(),
549                }),
550            ),
551            None => (self, None),
552        }
553    }
554
555    fn fold_with<F>(self, folder: F) -> F
556    where
557        F: Folder<Self::Item>,
558    {
559        let base = self.index;
560        folder.consume_iter(self.chars.char_indices().map(offset(base)))
561    }
562}
563
564// /////////////////////////////////////////////////////////////////////////
565
566/// Parallel iterator over the bytes of a string
567#[derive(Debug, Clone)]
568pub struct Bytes<'ch> {
569    chars: &'ch str,
570}
571
572struct BytesProducer<'ch> {
573    chars: &'ch str,
574}
575
576impl<'ch> ParallelIterator for Bytes<'ch> {
577    type Item = u8;
578
579    fn drive_unindexed<C>(self, consumer: C) -> C::Result
580    where
581        C: UnindexedConsumer<Self::Item>,
582    {
583        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
584    }
585}
586
587impl<'ch> UnindexedProducer for BytesProducer<'ch> {
588    type Item = u8;
589
590    fn split(self) -> (Self, Option<Self>) {
591        match split(self.chars) {
592            Some((left, right)) => (
593                BytesProducer { chars: left },
594                Some(BytesProducer { chars: right }),
595            ),
596            None => (self, None),
597        }
598    }
599
600    fn fold_with<F>(self, folder: F) -> F
601    where
602        F: Folder<Self::Item>,
603    {
604        folder.consume_iter(self.chars.bytes())
605    }
606}
607
608// /////////////////////////////////////////////////////////////////////////
609
610/// Parallel iterator over a string encoded as UTF-16
611#[derive(Debug, Clone)]
612pub struct EncodeUtf16<'ch> {
613    chars: &'ch str,
614}
615
616struct EncodeUtf16Producer<'ch> {
617    chars: &'ch str,
618}
619
620impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
621    type Item = u16;
622
623    fn drive_unindexed<C>(self, consumer: C) -> C::Result
624    where
625        C: UnindexedConsumer<Self::Item>,
626    {
627        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
628    }
629}
630
631impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
632    type Item = u16;
633
634    fn split(self) -> (Self, Option<Self>) {
635        match split(self.chars) {
636            Some((left, right)) => (
637                EncodeUtf16Producer { chars: left },
638                Some(EncodeUtf16Producer { chars: right }),
639            ),
640            None => (self, None),
641        }
642    }
643
644    fn fold_with<F>(self, folder: F) -> F
645    where
646        F: Folder<Self::Item>,
647    {
648        folder.consume_iter(self.chars.encode_utf16())
649    }
650}
651
652// /////////////////////////////////////////////////////////////////////////
653
654/// Parallel iterator over substrings separated by a pattern
655#[derive(Debug, Clone)]
656pub struct Split<'ch, P: Pattern> {
657    chars: &'ch str,
658    separator: P,
659}
660
661impl<'ch, P: Pattern> Split<'ch, P> {
662    fn new(chars: &'ch str, separator: P) -> Self {
663        Split { chars, separator }
664    }
665}
666
667impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
668    type Item = &'ch str;
669
670    fn drive_unindexed<C>(self, consumer: C) -> C::Result
671    where
672        C: UnindexedConsumer<Self::Item>,
673    {
674        let producer = SplitProducer::new(self.chars, &self.separator);
675        bridge_unindexed(producer, consumer)
676    }
677}
678
679/// Implement support for `SplitProducer`.
680impl<P: Pattern> Fissile<P> for &str {
681    fn length(&self) -> usize {
682        self.len()
683    }
684
685    fn midpoint(&self, end: usize) -> usize {
686        // First find a suitable UTF-8 boundary.
687        find_char_midpoint(&self[..end])
688    }
689
690    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
691        separator.find_in(&self[start..end])
692    }
693
694    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
695        separator.rfind_in(&self[..end])
696    }
697
698    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
699        if INCL {
700            // include the separator in the left side
701            let separator = self[index..].chars().next().unwrap();
702            self.split_at(index + separator.len_utf8())
703        } else {
704            let (left, right) = self.split_at(index);
705            let mut right_iter = right.chars();
706            right_iter.next(); // skip the separator
707            (left, right_iter.as_str())
708        }
709    }
710
711    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
712    where
713        F: Folder<Self>,
714    {
715        if INCL {
716            debug_assert!(!skip_last);
717            separator.fold_inclusive_splits(self, folder)
718        } else {
719            separator.fold_splits(self, folder, skip_last)
720        }
721    }
722}
723
724// /////////////////////////////////////////////////////////////////////////
725
726/// Parallel iterator over substrings separated by a pattern
727#[derive(Debug, Clone)]
728pub struct SplitInclusive<'ch, P: Pattern> {
729    chars: &'ch str,
730    separator: P,
731}
732
733impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
734    fn new(chars: &'ch str, separator: P) -> Self {
735        SplitInclusive { chars, separator }
736    }
737}
738
739impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
740    type Item = &'ch str;
741
742    fn drive_unindexed<C>(self, consumer: C) -> C::Result
743    where
744        C: UnindexedConsumer<Self::Item>,
745    {
746        let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
747        bridge_unindexed(producer, consumer)
748    }
749}
750
751// /////////////////////////////////////////////////////////////////////////
752
753/// Parallel iterator over substrings separated by a terminator pattern
754#[derive(Debug, Clone)]
755pub struct SplitTerminator<'ch, P: Pattern> {
756    chars: &'ch str,
757    terminator: P,
758}
759
760struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
761    splitter: SplitProducer<'sep, P, &'ch str>,
762    skip_last: bool,
763}
764
765impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
766    fn new(chars: &'ch str, terminator: P) -> Self {
767        SplitTerminator { chars, terminator }
768    }
769}
770
771impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
772    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
773        SplitTerminatorProducer {
774            splitter: SplitProducer::new(chars, terminator),
775            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
776        }
777    }
778}
779
780impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
781    type Item = &'ch str;
782
783    fn drive_unindexed<C>(self, consumer: C) -> C::Result
784    where
785        C: UnindexedConsumer<Self::Item>,
786    {
787        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
788        bridge_unindexed(producer, consumer)
789    }
790}
791
792impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
793    type Item = &'ch str;
794
795    fn split(mut self) -> (Self, Option<Self>) {
796        let (left, right) = self.splitter.split();
797        self.splitter = left;
798        let right = right.map(|right| {
799            let skip_last = self.skip_last;
800            self.skip_last = false;
801            SplitTerminatorProducer {
802                splitter: right,
803                skip_last,
804            }
805        });
806        (self, right)
807    }
808
809    fn fold_with<F>(self, folder: F) -> F
810    where
811        F: Folder<Self::Item>,
812    {
813        self.splitter.fold_with(folder, self.skip_last)
814    }
815}
816
817// /////////////////////////////////////////////////////////////////////////
818
819/// Parallel iterator over lines in a string
820#[derive(Debug, Clone)]
821pub struct Lines<'ch>(&'ch str);
822
823#[inline]
824fn no_carriage_return(line: &str) -> &str {
825    line.strip_suffix('\r').unwrap_or(line)
826}
827
828impl<'ch> ParallelIterator for Lines<'ch> {
829    type Item = &'ch str;
830
831    fn drive_unindexed<C>(self, consumer: C) -> C::Result
832    where
833        C: UnindexedConsumer<Self::Item>,
834    {
835        self.0
836            .par_split_terminator('\n')
837            .map(no_carriage_return)
838            .drive_unindexed(consumer)
839    }
840}
841
842// /////////////////////////////////////////////////////////////////////////
843
844/// Parallel iterator over substrings separated by whitespace
845#[derive(Debug, Clone)]
846pub struct SplitWhitespace<'ch>(&'ch str);
847
848#[inline]
849fn not_empty(s: &&str) -> bool {
850    !s.is_empty()
851}
852
853impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
854    type Item = &'ch str;
855
856    fn drive_unindexed<C>(self, consumer: C) -> C::Result
857    where
858        C: UnindexedConsumer<Self::Item>,
859    {
860        self.0
861            .par_split(char::is_whitespace)
862            .filter(not_empty)
863            .drive_unindexed(consumer)
864    }
865}
866
867// /////////////////////////////////////////////////////////////////////////
868
869/// Parallel iterator over substrings separated by ASCII whitespace
870#[derive(Debug, Clone)]
871pub struct SplitAsciiWhitespace<'ch>(&'ch str);
872
873#[inline]
874fn is_ascii_whitespace(c: char) -> bool {
875    c.is_ascii_whitespace()
876}
877
878impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
879    type Item = &'ch str;
880
881    fn drive_unindexed<C>(self, consumer: C) -> C::Result
882    where
883        C: UnindexedConsumer<Self::Item>,
884    {
885        self.0
886            .par_split(is_ascii_whitespace)
887            .filter(not_empty)
888            .drive_unindexed(consumer)
889    }
890}
891
892// /////////////////////////////////////////////////////////////////////////
893
894/// Parallel iterator over substrings that match a pattern
895#[derive(Debug, Clone)]
896pub struct Matches<'ch, P: Pattern> {
897    chars: &'ch str,
898    pattern: P,
899}
900
901struct MatchesProducer<'ch, 'pat, P: Pattern> {
902    chars: &'ch str,
903    pattern: &'pat P,
904}
905
906impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
907    type Item = &'ch str;
908
909    fn drive_unindexed<C>(self, consumer: C) -> C::Result
910    where
911        C: UnindexedConsumer<Self::Item>,
912    {
913        let producer = MatchesProducer {
914            chars: self.chars,
915            pattern: &self.pattern,
916        };
917        bridge_unindexed(producer, consumer)
918    }
919}
920
921impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
922    type Item = &'ch str;
923
924    fn split(self) -> (Self, Option<Self>) {
925        match split(self.chars) {
926            Some((left, right)) => (
927                MatchesProducer {
928                    chars: left,
929                    ..self
930                },
931                Some(MatchesProducer {
932                    chars: right,
933                    ..self
934                }),
935            ),
936            None => (self, None),
937        }
938    }
939
940    fn fold_with<F>(self, folder: F) -> F
941    where
942        F: Folder<Self::Item>,
943    {
944        self.pattern.fold_matches(self.chars, folder)
945    }
946}
947
948// /////////////////////////////////////////////////////////////////////////
949
950/// Parallel iterator over substrings that match a pattern, with their positions
951#[derive(Debug, Clone)]
952pub struct MatchIndices<'ch, P: Pattern> {
953    chars: &'ch str,
954    pattern: P,
955}
956
957struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
958    index: usize,
959    chars: &'ch str,
960    pattern: &'pat P,
961}
962
963impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
964    type Item = (usize, &'ch str);
965
966    fn drive_unindexed<C>(self, consumer: C) -> C::Result
967    where
968        C: UnindexedConsumer<Self::Item>,
969    {
970        let producer = MatchIndicesProducer {
971            index: 0,
972            chars: self.chars,
973            pattern: &self.pattern,
974        };
975        bridge_unindexed(producer, consumer)
976    }
977}
978
979impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
980    type Item = (usize, &'ch str);
981
982    fn split(self) -> (Self, Option<Self>) {
983        match split(self.chars) {
984            Some((left, right)) => (
985                MatchIndicesProducer {
986                    chars: left,
987                    ..self
988                },
989                Some(MatchIndicesProducer {
990                    chars: right,
991                    index: self.index + left.len(),
992                    ..self
993                }),
994            ),
995            None => (self, None),
996        }
997    }
998
999    fn fold_with<F>(self, folder: F) -> F
1000    where
1001        F: Folder<Self::Item>,
1002    {
1003        self.pattern
1004            .fold_match_indices(self.chars, folder, self.index)
1005    }
1006}