Skip to main content

itertools/
duplicates_impl.rs

1use core::hash::BuildHasher;
2use std::collections::hash_map::RandomState;
3use std::hash::Hash;
4
5mod private {
6    use core::hash::BuildHasher;
7    use std::collections::hash_map::RandomState;
8    use std::collections::HashMap;
9    use std::fmt;
10    use std::hash::Hash;
11
12    #[derive(Clone)]
13    #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
14    pub struct DuplicatesBy<I: Iterator, Key, F, S = RandomState>
15    where
16        S: BuildHasher,
17    {
18        pub(crate) iter: I,
19        pub(crate) meta: Meta<Key, F, S>,
20    }
21
22    impl<I, V, F, S> fmt::Debug for DuplicatesBy<I, V, F, S>
23    where
24        I: Iterator + fmt::Debug,
25        V: fmt::Debug + Hash + Eq,
26        S: BuildHasher,
27    {
28        debug_fmt_fields!(DuplicatesBy, iter, meta.used);
29    }
30
31    impl<I: Iterator, Key: Eq + Hash, F, S: BuildHasher> DuplicatesBy<I, Key, F, S> {
32        pub(crate) fn new(iter: I, key_method: F, hash_builder: S) -> Self {
33            Self {
34                iter,
35                meta: Meta {
36                    used: HashMap::with_hasher(hash_builder),
37                    pending: 0,
38                    key_method,
39                },
40            }
41        }
42    }
43
44    #[derive(Clone)]
45    pub struct Meta<Key, F, S> {
46        used: HashMap<Key, bool, S>,
47        pending: usize,
48        key_method: F,
49    }
50
51    impl<Key, F, S> Meta<Key, F, S>
52    where
53        Key: Eq + Hash,
54        S: BuildHasher,
55    {
56        /// Takes an item and returns it back to the caller if it's the second time we see it.
57        /// Otherwise the item is consumed and None is returned
58        #[inline(always)]
59        fn filter<I>(&mut self, item: I) -> Option<I>
60        where
61            F: KeyMethod<Key, I>,
62        {
63            let kv = self.key_method.make(item);
64            match self.used.get_mut(kv.key_ref()) {
65                None => {
66                    self.used.insert(kv.key(), false);
67                    self.pending += 1;
68                    None
69                }
70                Some(true) => None,
71                Some(produced) => {
72                    *produced = true;
73                    self.pending -= 1;
74                    Some(kv.value())
75                }
76            }
77        }
78    }
79
80    impl<I, Key, F, S> Iterator for DuplicatesBy<I, Key, F, S>
81    where
82        I: Iterator,
83        Key: Eq + Hash,
84        F: KeyMethod<Key, I::Item>,
85        S: BuildHasher,
86    {
87        type Item = I::Item;
88
89        fn next(&mut self) -> Option<Self::Item> {
90            let Self { iter, meta } = self;
91            iter.find_map(|v| meta.filter(v))
92        }
93
94        #[inline]
95        fn size_hint(&self) -> (usize, Option<usize>) {
96            let (_, hi) = self.iter.size_hint();
97            let hi = hi.map(|hi| {
98                if hi <= self.meta.pending {
99                    // fewer or equally many iter-remaining elements than pending elements
100                    // => at most, each iter-remaining element is matched
101                    hi
102                } else {
103                    // fewer pending elements than iter-remaining elements
104                    // => at most:
105                    //    * each pending element is matched
106                    //    * the other iter-remaining elements come in pairs
107                    self.meta.pending + (hi - self.meta.pending) / 2
108                }
109            });
110            // The lower bound is always 0 since we might only get unique items from now on
111            (0, hi)
112        }
113    }
114
115    impl<I, Key, F, S> DoubleEndedIterator for DuplicatesBy<I, Key, F, S>
116    where
117        I: DoubleEndedIterator,
118        Key: Eq + Hash,
119        F: KeyMethod<Key, I::Item>,
120        S: BuildHasher,
121    {
122        fn next_back(&mut self) -> Option<Self::Item> {
123            let Self { iter, meta } = self;
124            iter.rev().find_map(|v| meta.filter(v))
125        }
126    }
127
128    /// A keying method for use with `DuplicatesBy`
129    pub trait KeyMethod<K, V> {
130        type Container: KeyXorValue<K, V>;
131
132        fn make(&mut self, value: V) -> Self::Container;
133    }
134
135    /// Apply the identity function to elements before checking them for equality.
136    #[derive(Debug, Clone)]
137    pub struct ById;
138    impl<V> KeyMethod<V, V> for ById {
139        type Container = JustValue<V>;
140
141        fn make(&mut self, v: V) -> Self::Container {
142            JustValue(v)
143        }
144    }
145
146    /// Apply a user-supplied function to elements before checking them for equality.
147    #[derive(Clone)]
148    pub struct ByFn<F>(pub(crate) F);
149    impl<F> fmt::Debug for ByFn<F> {
150        debug_fmt_fields!(ByFn,);
151    }
152    impl<K, V, F> KeyMethod<K, V> for ByFn<F>
153    where
154        F: FnMut(&V) -> K,
155    {
156        type Container = KeyValue<K, V>;
157
158        fn make(&mut self, v: V) -> Self::Container {
159            KeyValue((self.0)(&v), v)
160        }
161    }
162
163    // Implementors of this trait can hold onto a key and a value but only give access to one of them
164    // at a time. This allows the key and the value to be the same value internally
165    pub trait KeyXorValue<K, V> {
166        fn key_ref(&self) -> &K;
167        fn key(self) -> K;
168        fn value(self) -> V;
169    }
170
171    #[derive(Debug)]
172    pub struct KeyValue<K, V>(K, V);
173    impl<K, V> KeyXorValue<K, V> for KeyValue<K, V> {
174        fn key_ref(&self) -> &K {
175            &self.0
176        }
177        fn key(self) -> K {
178            self.0
179        }
180        fn value(self) -> V {
181            self.1
182        }
183    }
184
185    #[derive(Debug)]
186    pub struct JustValue<V>(V);
187    impl<V> KeyXorValue<V, V> for JustValue<V> {
188        fn key_ref(&self) -> &V {
189            &self.0
190        }
191        fn key(self) -> V {
192            self.0
193        }
194        fn value(self) -> V {
195            self.0
196        }
197    }
198}
199
200/// An iterator adapter to filter for duplicate elements.
201///
202/// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information.
203pub type DuplicatesBy<I, V, F, S = RandomState> = private::DuplicatesBy<I, V, private::ByFn<F>, S>;
204
205/// Create a new `DuplicatesBy` iterator with a specified hash builder.
206pub fn duplicates_by_with_hasher<I, Key, F, S>(
207    iter: I,
208    f: F,
209    hash_builder: S,
210) -> DuplicatesBy<I, Key, F, S>
211where
212    Key: Eq + Hash,
213    F: FnMut(&I::Item) -> Key,
214    I: Iterator,
215    S: BuildHasher,
216{
217    DuplicatesBy::new(iter, private::ByFn(f), hash_builder)
218}
219
220/// An iterator adapter to filter out duplicate elements.
221///
222/// See [`.duplicates()`](crate::Itertools::duplicates) for more information.
223pub type Duplicates<I, S = RandomState> =
224    private::DuplicatesBy<I, <I as Iterator>::Item, private::ById, S>;
225
226/// Create a new `Duplicates` iterator with a specified hash builder.
227pub fn duplicates_with_hasher<I, S>(iter: I, hash_builder: S) -> Duplicates<I, S>
228where
229    I: Iterator,
230    I::Item: Eq + Hash,
231    S: BuildHasher,
232{
233    Duplicates::new(iter, private::ById, hash_builder)
234}