1use std::fmt::{self, Debug};
9
10pub const BLOOM_HASH_MASK: u32 = 0x00ffffff;
13const KEY_SIZE: usize = 12;
14
15const ARRAY_SIZE: usize = 1 << KEY_SIZE;
16const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
17
18pub type BloomFilter = CountingBloomFilter<BloomStorageU8>;
20
21#[derive(Clone, Default)]
68pub struct CountingBloomFilter<S>
69where
70 S: BloomStorage,
71{
72 storage: S,
73}
74
75impl<S> CountingBloomFilter<S>
76where
77 S: BloomStorage,
78{
79 #[inline]
81 pub fn new() -> Self {
82 Default::default()
83 }
84
85 #[inline]
86 pub fn clear(&mut self) {
87 self.storage = Default::default();
88 }
89
90 #[cfg(debug_assertions)]
93 pub fn is_zeroed(&self) -> bool {
94 self.storage.is_zeroed()
95 }
96
97 #[cfg(not(debug_assertions))]
98 pub fn is_zeroed(&self) -> bool {
99 unreachable!()
100 }
101
102 #[inline]
104 pub fn insert_hash(&mut self, hash: u32) {
105 self.storage.adjust_first_slot(hash, true);
106 self.storage.adjust_second_slot(hash, true);
107 }
108
109 #[inline]
111 pub fn remove_hash(&mut self, hash: u32) {
112 self.storage.adjust_first_slot(hash, false);
113 self.storage.adjust_second_slot(hash, false);
114 }
115
116 #[inline]
120 pub fn might_contain_hash(&self, hash: u32) -> bool {
121 !self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
122 }
123}
124
125impl<S> Debug for CountingBloomFilter<S>
126where
127 S: BloomStorage,
128{
129 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
130 let mut slots_used = 0;
131 for i in 0..ARRAY_SIZE {
132 if !self.storage.slot_is_empty(i) {
133 slots_used += 1;
134 }
135 }
136 write!(f, "BloomFilter({}/{})", slots_used, ARRAY_SIZE)
137 }
138}
139
140pub trait BloomStorage: Clone + Default {
141 fn slot_is_empty(&self, index: usize) -> bool;
142 fn adjust_slot(&mut self, index: usize, increment: bool);
143 fn is_zeroed(&self) -> bool;
144
145 #[inline]
146 fn first_slot_is_empty(&self, hash: u32) -> bool {
147 self.slot_is_empty(Self::first_slot_index(hash))
148 }
149
150 #[inline]
151 fn second_slot_is_empty(&self, hash: u32) -> bool {
152 self.slot_is_empty(Self::second_slot_index(hash))
153 }
154
155 #[inline]
156 fn adjust_first_slot(&mut self, hash: u32, increment: bool) {
157 self.adjust_slot(Self::first_slot_index(hash), increment)
158 }
159
160 #[inline]
161 fn adjust_second_slot(&mut self, hash: u32, increment: bool) {
162 self.adjust_slot(Self::second_slot_index(hash), increment)
163 }
164
165 #[inline]
166 fn first_slot_index(hash: u32) -> usize {
167 hash1(hash) as usize
168 }
169
170 #[inline]
171 fn second_slot_index(hash: u32) -> usize {
172 hash2(hash) as usize
173 }
174}
175
176pub struct BloomStorageU8 {
178 counters: [u8; ARRAY_SIZE],
179}
180
181impl BloomStorage for BloomStorageU8 {
182 #[inline]
183 fn adjust_slot(&mut self, index: usize, increment: bool) {
184 let slot = &mut self.counters[index];
185 if *slot != 0xff {
186 if increment {
188 *slot += 1;
189 } else {
190 *slot -= 1;
191 }
192 }
193 }
194
195 #[inline]
196 fn slot_is_empty(&self, index: usize) -> bool {
197 self.counters[index] == 0
198 }
199
200 #[inline]
201 fn is_zeroed(&self) -> bool {
202 self.counters.iter().all(|x| *x == 0)
203 }
204}
205
206impl Default for BloomStorageU8 {
207 fn default() -> Self {
208 BloomStorageU8 {
209 counters: [0; ARRAY_SIZE],
210 }
211 }
212}
213
214impl Clone for BloomStorageU8 {
215 fn clone(&self) -> Self {
216 BloomStorageU8 {
217 counters: self.counters,
218 }
219 }
220}
221
222pub struct BloomStorageBool {
224 counters: [u8; ARRAY_SIZE / 8],
225}
226
227impl BloomStorage for BloomStorageBool {
228 #[inline]
229 fn adjust_slot(&mut self, index: usize, increment: bool) {
230 let bit = 1 << (index % 8);
231 let byte = &mut self.counters[index / 8];
232
233 assert!(
237 increment || (*byte & bit) != 0,
238 "should not decrement if slot is already false"
239 );
240
241 if increment {
242 *byte |= bit;
243 }
244 }
245
246 #[inline]
247 fn slot_is_empty(&self, index: usize) -> bool {
248 let bit = 1 << (index % 8);
249 (self.counters[index / 8] & bit) == 0
250 }
251
252 #[inline]
253 fn is_zeroed(&self) -> bool {
254 self.counters.iter().all(|x| *x == 0)
255 }
256}
257
258impl Default for BloomStorageBool {
259 fn default() -> Self {
260 BloomStorageBool {
261 counters: [0; ARRAY_SIZE / 8],
262 }
263 }
264}
265
266impl Clone for BloomStorageBool {
267 fn clone(&self) -> Self {
268 BloomStorageBool {
269 counters: self.counters,
270 }
271 }
272}
273
274#[inline]
275fn hash1(hash: u32) -> u32 {
276 hash & KEY_MASK
277}
278
279#[inline]
280fn hash2(hash: u32) -> u32 {
281 (hash >> KEY_SIZE) & KEY_MASK
282}
283
284#[test]
285fn create_and_insert_some_stuff() {
286 use fxhash::FxHasher;
287 use std::hash::{Hash, Hasher};
288 use std::mem::transmute;
289
290 fn hash_as_str(i: usize) -> u32 {
291 let mut hasher = FxHasher::default();
292 let s = i.to_string();
293 s.hash(&mut hasher);
294 let hash: u64 = hasher.finish();
295 (hash >> 32) as u32 ^ (hash as u32)
296 }
297
298 let mut bf = BloomFilter::new();
299
300 unsafe {
303 transmute::<[u8; ARRAY_SIZE % 8], [u8; 0]>([]);
304 }
305
306 for i in 0_usize..1000 {
307 bf.insert_hash(hash_as_str(i));
308 }
309
310 for i in 0_usize..1000 {
311 assert!(bf.might_contain_hash(hash_as_str(i)));
312 }
313
314 let false_positives = (1001_usize..2000)
315 .filter(|i| bf.might_contain_hash(hash_as_str(*i)))
316 .count();
317
318 assert!(false_positives < 190, "{} is not < 190", false_positives); for i in 0_usize..100 {
321 bf.remove_hash(hash_as_str(i));
322 }
323
324 for i in 100_usize..1000 {
325 assert!(bf.might_contain_hash(hash_as_str(i)));
326 }
327
328 let false_positives = (0_usize..100)
329 .filter(|i| bf.might_contain_hash(hash_as_str(*i)))
330 .count();
331
332 assert!(false_positives < 20, "{} is not < 20", false_positives); bf.clear();
335
336 for i in 0_usize..2000 {
337 assert!(!bf.might_contain_hash(hash_as_str(i)));
338 }
339}
340
341#[cfg(feature = "bench")]
342#[cfg(test)]
343mod bench {
344 extern crate test;
345 use super::BloomFilter;
346
347 #[derive(Default)]
348 struct HashGenerator(u32);
349
350 impl HashGenerator {
351 fn next(&mut self) -> u32 {
352 self.0 += (65) + (65 << super::KEY_SIZE);
361 self.0
362 }
363 }
364
365 #[bench]
366 fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
367 b.iter(|| {
368 let mut gen1 = HashGenerator::default();
369 let mut gen2 = HashGenerator::default();
370 let mut bf = BloomFilter::new();
371 for _ in 0_usize..1000 {
372 bf.insert_hash(gen1.next());
373 }
374 for _ in 0_usize..100 {
375 bf.remove_hash(gen2.next());
376 }
377 for _ in 100_usize..200 {
378 test::black_box(bf.might_contain_hash(gen2.next()));
379 }
380 });
381 }
382
383 #[bench]
384 fn might_contain_10(b: &mut test::Bencher) {
385 let bf = BloomFilter::new();
386 let mut gen = HashGenerator::default();
387 b.iter(|| {
388 for _ in 0..10 {
389 test::black_box(bf.might_contain_hash(gen.next()));
390 }
391 });
392 }
393
394 #[bench]
395 fn clear(b: &mut test::Bencher) {
396 let mut bf = Box::new(BloomFilter::new());
397 b.iter(|| test::black_box(&mut bf).clear());
398 }
399
400 #[bench]
401 fn insert_10(b: &mut test::Bencher) {
402 let mut bf = BloomFilter::new();
403 let mut gen = HashGenerator::default();
404 b.iter(|| {
405 for _ in 0..10 {
406 test::black_box(bf.insert_hash(gen.next()));
407 }
408 });
409 }
410
411 #[bench]
412 fn remove_10(b: &mut test::Bencher) {
413 let mut bf = BloomFilter::new();
414 let mut gen = HashGenerator::default();
415 b.iter(|| {
417 for _ in 0..10 {
418 bf.remove_hash(gen.next())
419 }
420 });
421 }
422}