unic_char_range/
iter.rs

1// Copyright 2017 The UNIC Project Developers.
2//
3// See the COPYRIGHT file at the top-level directory of this distribution.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use core::{char, ops};
12
13use crate::{step, CharRange};
14
15const SURROGATE_RANGE: ops::Range<u32> = 0xD800..0xE000;
16
17/// An iterator over a range of unicode code points.
18///
19/// Constructed via `CharRange::iter`. See `CharRange` for more information.
20#[derive(Clone, Debug)]
21pub struct CharIter {
22    /// The lowest uniterated character (inclusive).
23    ///
24    /// Iteration is finished if this is higher than `high`.
25    low: char,
26
27    /// The highest uniterated character (inclusive).
28    ///
29    /// Iteration is finished if this is lower than `low`.
30    high: char,
31}
32
33impl From<CharRange> for CharIter {
34    fn from(range: CharRange) -> CharIter {
35        CharIter {
36            low: range.low,
37            high: range.high,
38        }
39    }
40}
41
42impl From<CharIter> for CharRange {
43    fn from(iter: CharIter) -> CharRange {
44        CharRange {
45            low: iter.low,
46            high: iter.high,
47        }
48    }
49}
50
51impl CharIter {
52    #[inline]
53    #[allow(unsafe_code)]
54    // When stepping `self.low` forward would go over `char::MAX`,
55    // Set `self.high` to `'\0'` instead. It will have the same effect --
56    // consuming the last element from the iterator and ending iteration.
57    fn step_forward(&mut self) {
58        if self.low == char::MAX {
59            self.high = '\0'
60        } else {
61            self.low = unsafe { step::forward(self.low) }
62        }
63    }
64
65    #[inline]
66    #[allow(unsafe_code)]
67    // When stepping `self.high` backward would cause underflow,
68    // set `self.low` to `char::MAX` instead. It will have the same effect --
69    // consuming the last element from the iterator and ending iteration.
70    fn step_backward(&mut self) {
71        if self.high == '\0' {
72            self.low = char::MAX;
73        } else {
74            self.high = unsafe { step::backward(self.high) }
75        }
76    }
77
78    #[inline]
79    /// ExactSizeIterator::is_empty() for stable
80    fn is_finished(&self) -> bool {
81        self.low > self.high
82    }
83}
84
85impl Iterator for CharIter {
86    type Item = char;
87
88    #[inline]
89    fn next(&mut self) -> Option<char> {
90        if self.is_finished() {
91            return None;
92        }
93
94        let ch = self.low;
95        self.step_forward();
96        Some(ch)
97    }
98
99    fn size_hint(&self) -> (usize, Option<usize>) {
100        let len = self.len();
101        (len, Some(len))
102    }
103
104    fn last(self) -> Option<char> {
105        if self.is_finished() {
106            None
107        } else {
108            Some(self.high)
109        }
110    }
111
112    fn max(self) -> Option<char> {
113        self.last()
114    }
115
116    fn min(mut self) -> Option<char> {
117        self.next()
118    }
119}
120
121impl DoubleEndedIterator for CharIter {
122    #[inline]
123    fn next_back(&mut self) -> Option<Self::Item> {
124        if self.is_finished() {
125            None
126        } else {
127            let ch = self.high;
128            self.step_backward();
129            Some(ch)
130        }
131    }
132}
133
134impl ExactSizeIterator for CharIter {
135    fn len(&self) -> usize {
136        if self.is_finished() {
137            return 0;
138        }
139        let naive_range = (self.low as u32)..(self.high as u32 + 1);
140        if naive_range.start <= SURROGATE_RANGE.start && SURROGATE_RANGE.end <= naive_range.end {
141            naive_range.len() - SURROGATE_RANGE.len()
142        } else {
143            naive_range.len()
144        }
145    }
146
147    #[cfg(feature = "exact-size-is-empty")]
148    fn is_empty(&self) -> bool {
149        self.is_finished()
150    }
151}