hashbrown/raw.rs
1use crate::TryReserveError;
2use crate::control::{BitMaskIter, Group, Tag, TagSliceExt};
3use crate::scopeguard::{ScopeGuard, guard};
4use crate::util::{invalid_mut, likely, unlikely};
5use core::array;
6use core::iter::FusedIterator;
7use core::marker::PhantomData;
8use core::mem;
9use core::ptr;
10use core::ptr::NonNull;
11use core::slice;
12use stdalloc::alloc::{Layout, handle_alloc_error};
13
14#[cfg(test)]
15use crate::alloc::AllocError;
16use crate::alloc::{Allocator, Global, do_alloc};
17
18#[inline]
19unsafe fn offset_from<T>(to: *const T, from: *const T) -> usize {
20 unsafe { to.offset_from(from) as usize }
21}
22
23/// Whether memory allocation errors should return an error or abort.
24#[derive(Copy, Clone)]
25enum Fallibility {
26 Fallible,
27 Infallible,
28}
29
30impl Fallibility {
31 /// Error to return on capacity overflow.
32 #[cfg_attr(feature = "inline-more", inline)]
33 fn capacity_overflow(self) -> TryReserveError {
34 match self {
35 Fallibility::Fallible => TryReserveError::CapacityOverflow,
36 Fallibility::Infallible => panic!("Hash table capacity overflow"),
37 }
38 }
39
40 /// Error to return on allocation error.
41 #[cfg_attr(feature = "inline-more", inline)]
42 fn alloc_err(self, layout: Layout) -> TryReserveError {
43 match self {
44 Fallibility::Fallible => TryReserveError::AllocError { layout },
45 Fallibility::Infallible => handle_alloc_error(layout),
46 }
47 }
48}
49
50trait SizedTypeProperties: Sized {
51 const IS_ZERO_SIZED: bool = mem::size_of::<Self>() == 0;
52 const NEEDS_DROP: bool = mem::needs_drop::<Self>();
53}
54
55impl<T> SizedTypeProperties for T {}
56
57/// Primary hash function, used to select the initial bucket to probe from.
58#[inline]
59#[expect(clippy::cast_possible_truncation)]
60fn h1(hash: u64) -> usize {
61 // On 32-bit platforms we simply ignore the higher hash bits.
62 hash as usize
63}
64
65/// Probe sequence based on triangular numbers, which is guaranteed (since our
66/// table size is a power of two) to visit every group of elements exactly once.
67///
68/// A triangular probe has us jump by 1 more group every time. So first we
69/// jump by 1 group (meaning we just continue our linear scan), then 2 groups
70/// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on.
71///
72/// Proof that the probe will visit every group in the table:
73/// <https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/>
74#[derive(Clone)]
75struct ProbeSeq {
76 pos: usize,
77 stride: usize,
78}
79
80impl ProbeSeq {
81 #[inline]
82 fn move_next(&mut self, bucket_mask: usize) {
83 // We should have found an empty bucket by now and ended the probe.
84 debug_assert!(
85 self.stride <= bucket_mask,
86 "Went past end of probe sequence"
87 );
88
89 self.stride += Group::WIDTH;
90 self.pos += self.stride;
91 self.pos &= bucket_mask;
92 }
93}
94
95/// Returns the number of buckets needed to hold the given number of items,
96/// taking the maximum load factor into account.
97///
98/// Returns `None` if an overflow occurs.
99///
100/// This ensures that `buckets * table_layout.size >= table_layout.ctrl_align`.
101// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
102#[cfg_attr(target_os = "emscripten", inline(never))]
103#[cfg_attr(not(target_os = "emscripten"), inline)]
104fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option<usize> {
105 debug_assert_ne!(cap, 0);
106
107 // For small tables we require at least 1 empty bucket so that lookups are
108 // guaranteed to terminate if an element doesn't exist in the table.
109 if cap < 15 {
110 // Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a
111 // platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small
112 // bucket sizes, this ends up wasting quite a few bytes just to pad to
113 // the relatively larger ctrl_align:
114 //
115 // | capacity | buckets | bytes allocated | bytes per item |
116 // | -------- | ------- | --------------- | -------------- |
117 // | 3 | 4 | 36 | (Yikes!) 12.0 |
118 // | 7 | 8 | 40 | (Poor) 5.7 |
119 // | 14 | 16 | 48 | 3.4 |
120 // | 28 | 32 | 80 | 3.3 |
121 //
122 // In general, buckets * table_layout.size >= table_layout.ctrl_align
123 // must be true to avoid these edges. This is implemented by adjusting
124 // the minimum capacity upwards for small items. This code only needs
125 // to handle ctrl_align which are less than or equal to Group::WIDTH,
126 // because valid layout sizes are always a multiple of the alignment,
127 // so anything with alignment over the Group::WIDTH won't hit this edge
128 // case.
129
130 // This is brittle, e.g. if we ever add 32 byte groups, it will select
131 // 3 regardless of the table_layout.size.
132 let min_cap = match (Group::WIDTH, table_layout.size) {
133 (16, 0..=1) => 14,
134 (16, 2..=3) | (8, 0..=1) => 7,
135 _ => 3,
136 };
137 let cap = min_cap.max(cap);
138 // We don't bother with a table size of 2 buckets since that can only
139 // hold a single element. Instead, we skip directly to a 4 bucket table
140 // which can hold 3 elements.
141 let buckets = if cap < 4 {
142 4
143 } else if cap < 8 {
144 8
145 } else {
146 16
147 };
148 ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets);
149 return Some(buckets);
150 }
151
152 // Otherwise require 1/8 buckets to be empty (87.5% load)
153 //
154 // Be careful when modifying this, calculate_layout relies on the
155 // overflow check here.
156 let adjusted_cap = cap.checked_mul(8)? / 7;
157
158 // Any overflows will have been caught by the checked_mul. Also, any
159 // rounding errors from the division above will be cleaned up by
160 // next_power_of_two (which can't overflow because of the previous division).
161 let buckets = adjusted_cap.next_power_of_two();
162 ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets);
163 Some(buckets)
164}
165
166// `maximum_buckets_in` relies on the property that for non-ZST `T`, any
167// chosen `buckets` will satisfy `buckets * table_layout.size >=
168// table_layout.ctrl_align`, so `calculate_layout_for` does not need to add
169// extra padding beyond `table_layout.size * buckets`. If small-table bucket
170// selection or growth policy changes, revisit `maximum_buckets_in`.
171#[inline]
172fn ensure_bucket_bytes_at_least_ctrl_align(table_layout: TableLayout, buckets: usize) {
173 if table_layout.size != 0 {
174 let prod = table_layout.size.saturating_mul(buckets);
175 debug_assert!(prod >= table_layout.ctrl_align);
176 }
177}
178
179/// Returns the maximum effective capacity for the given bucket mask, taking
180/// the maximum load factor into account.
181#[inline]
182fn bucket_mask_to_capacity(bucket_mask: usize) -> usize {
183 if bucket_mask < 8 {
184 // For tables with 1/2/4/8 buckets, we always reserve one empty slot.
185 // Keep in mind that the bucket mask is one less than the bucket count.
186 bucket_mask
187 } else {
188 // For larger tables we reserve 12.5% of the slots as empty.
189 ((bucket_mask + 1) / 8) * 7
190 }
191}
192
193/// Helper which allows the max calculation for `ctrl_align` to be statically computed for each `T`
194/// while keeping the rest of `calculate_layout_for` independent of `T`
195#[derive(Copy, Clone)]
196struct TableLayout {
197 size: usize,
198 ctrl_align: usize,
199}
200
201impl TableLayout {
202 #[inline]
203 const fn new<T>() -> Self {
204 let layout = Layout::new::<T>();
205 Self {
206 size: layout.size(),
207 ctrl_align: if layout.align() > Group::WIDTH {
208 layout.align()
209 } else {
210 Group::WIDTH
211 },
212 }
213 }
214
215 #[inline]
216 fn calculate_layout_for(self, buckets: usize) -> Option<(Layout, usize)> {
217 debug_assert!(buckets.is_power_of_two());
218
219 let TableLayout { size, ctrl_align } = self;
220 // Manual layout calculation since Layout methods are not yet stable.
221 let ctrl_offset =
222 size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1);
223 let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?;
224
225 // We need an additional check to ensure that the allocation doesn't
226 // exceed `isize::MAX` (https://github.com/rust-lang/rust/pull/95295).
227 if len > isize::MAX as usize - (ctrl_align - 1) {
228 return None;
229 }
230
231 Some((
232 unsafe { Layout::from_size_align_unchecked(len, ctrl_align) },
233 ctrl_offset,
234 ))
235 }
236}
237
238/// A reference to a hash table bucket containing a `T`.
239///
240/// This is usually just a pointer to the element itself. However if the element
241/// is a ZST, then we instead track the index of the element in the table so
242/// that `erase` works properly.
243pub(crate) struct Bucket<T> {
244 // Actually it is pointer to next element than element itself
245 // this is needed to maintain pointer arithmetic invariants
246 // keeping direct pointer to element introduces difficulty.
247 // Using `NonNull` for variance and niche layout
248 ptr: NonNull<T>,
249}
250
251// This Send impl is needed for rayon support. This is safe since Bucket is
252// never exposed in a public API.
253unsafe impl<T> Send for Bucket<T> {}
254
255impl<T> Clone for Bucket<T> {
256 #[inline]
257 fn clone(&self) -> Self {
258 Self { ptr: self.ptr }
259 }
260}
261
262impl<T> Bucket<T> {
263 /// Creates a [`Bucket`] that contain pointer to the data.
264 /// The pointer calculation is performed by calculating the
265 /// offset from given `base` pointer (convenience for
266 /// `base.as_ptr().sub(index)`).
267 ///
268 /// `index` is in units of `T`; e.g., an `index` of 3 represents a pointer
269 /// offset of `3 * size_of::<T>()` bytes.
270 ///
271 /// If the `T` is a ZST, then we instead track the index of the element
272 /// in the table so that `erase` works properly (return
273 /// `NonNull::new_unchecked((index + 1) as *mut T)`)
274 ///
275 /// # Safety
276 ///
277 /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived
278 /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and the safety
279 /// rules of [`NonNull::new_unchecked`] function.
280 ///
281 /// Thus, in order to uphold the safety contracts for the [`<*mut T>::sub`] method
282 /// and [`NonNull::new_unchecked`] function, as well as for the correct
283 /// logic of the work of this crate, the following rules are necessary and
284 /// sufficient:
285 ///
286 /// * the `base` pointer must not be `dangling` and must points to the
287 /// end of the first `value element` from the `data part` of the table, i.e.
288 /// must be the pointer that returned by [`RawTable::data_end`] or by
289 /// [`RawTableInner::data_end<T>`];
290 ///
291 /// * `index` must not be greater than `RawTableInner.bucket_mask`, i.e.
292 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)`
293 /// must be no greater than the number returned by the function
294 /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`].
295 ///
296 /// If `mem::size_of::<T>() == 0`, then the only requirement is that the
297 /// `index` must not be greater than `RawTableInner.bucket_mask`, i.e.
298 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)`
299 /// must be no greater than the number returned by the function
300 /// [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`].
301 #[inline]
302 unsafe fn from_base_index(base: NonNull<T>, index: usize) -> Self {
303 // If mem::size_of::<T>() != 0 then return a pointer to an `element` in
304 // the data part of the table (we start counting from "0", so that
305 // in the expression T[last], the "last" index actually one less than the
306 // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask"):
307 //
308 // `from_base_index(base, 1).as_ptr()` returns a pointer that
309 // points here in the data part of the table
310 // (to the start of T1)
311 // |
312 // | `base: NonNull<T>` must point here
313 // | (to the end of T0 or to the start of C0)
314 // v v
315 // [Padding], Tlast, ..., |T1|, T0, |C0, C1, ..., Clast
316 // ^
317 // `from_base_index(base, 1)` returns a pointer
318 // that points here in the data part of the table
319 // (to the end of T1)
320 //
321 // where: T0...Tlast - our stored data; C0...Clast - control bytes
322 // or metadata for data.
323 let ptr = if T::IS_ZERO_SIZED {
324 // won't overflow because index must be less than length (bucket_mask)
325 // and bucket_mask is guaranteed to be less than `isize::MAX`
326 // (see TableLayout::calculate_layout_for method)
327 invalid_mut(index + 1)
328 } else {
329 unsafe { base.as_ptr().sub(index) }
330 };
331 Self {
332 ptr: unsafe { NonNull::new_unchecked(ptr) },
333 }
334 }
335
336 /// Calculates the index of a [`Bucket`] as distance between two pointers
337 /// (convenience for `base.as_ptr().offset_from(self.ptr.as_ptr()) as usize`).
338 /// The returned value is in units of T: the distance in bytes divided by
339 /// [`core::mem::size_of::<T>()`].
340 ///
341 /// If the `T` is a ZST, then we return the index of the element in
342 /// the table so that `erase` works properly (return `self.ptr.as_ptr() as usize - 1`).
343 ///
344 /// This function is the inverse of [`from_base_index`].
345 ///
346 /// # Safety
347 ///
348 /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived
349 /// from the safety rules for [`<*const T>::offset_from`] method of `*const T`.
350 ///
351 /// Thus, in order to uphold the safety contracts for [`<*const T>::offset_from`]
352 /// method, as well as for the correct logic of the work of this crate, the
353 /// following rules are necessary and sufficient:
354 ///
355 /// * `base` contained pointer must not be `dangling` and must point to the
356 /// end of the first `element` from the `data part` of the table, i.e.
357 /// must be a pointer that returns by [`RawTable::data_end`] or by
358 /// [`RawTableInner::data_end<T>`];
359 ///
360 /// * `self` also must not contain dangling pointer;
361 ///
362 /// * both `self` and `base` must be created from the same [`RawTable`]
363 /// (or [`RawTableInner`]).
364 ///
365 /// If `mem::size_of::<T>() == 0`, this function is always safe.
366 #[inline]
367 unsafe fn to_base_index(&self, base: NonNull<T>) -> usize {
368 // If mem::size_of::<T>() != 0 then return an index under which we used to store the
369 // `element` in the data part of the table (we start counting from "0", so
370 // that in the expression T[last], the "last" index actually is one less than the
371 // "buckets" number in the table, i.e. "last = RawTableInner.bucket_mask").
372 // For example for 5th element in table calculation is performed like this:
373 //
374 // mem::size_of::<T>()
375 // |
376 // | `self = from_base_index(base, 5)` that returns pointer
377 // | that points here in the data part of the table
378 // | (to the end of T5)
379 // | | `base: NonNull<T>` must point here
380 // v | (to the end of T0 or to the start of C0)
381 // /???\ v v
382 // [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast
383 // \__________ __________/
384 // \/
385 // `bucket.to_base_index(base)` = 5
386 // (base.as_ptr() as usize - self.ptr.as_ptr() as usize) / mem::size_of::<T>()
387 //
388 // where: T0...Tlast - our stored data; C0...Clast - control bytes or metadata for data.
389 if T::IS_ZERO_SIZED {
390 // this can not be UB
391 self.ptr.as_ptr() as usize - 1
392 } else {
393 unsafe { offset_from(base.as_ptr(), self.ptr.as_ptr()) }
394 }
395 }
396
397 /// Acquires the underlying raw pointer `*mut T` to `data`.
398 ///
399 /// # Note
400 ///
401 /// If `T` is not [`Copy`], do not use `*mut T` methods that can cause calling the
402 /// destructor of `T` (for example the [`<*mut T>::drop_in_place`] method), because
403 /// for properly dropping the data we also need to clear `data` control bytes. If we
404 /// drop data, but do not clear `data control byte` it leads to double drop when
405 /// [`RawTable`] goes out of scope.
406 ///
407 /// If you modify an already initialized `value`, so [`Hash`] and [`Eq`] on the new
408 /// `T` value and its borrowed form *must* match those for the old `T` value, as the map
409 /// will not re-evaluate where the new value should go, meaning the value may become
410 /// "lost" if their location does not reflect their state.
411 #[inline]
412 pub(crate) fn as_ptr(&self) -> *mut T {
413 if T::IS_ZERO_SIZED {
414 // Just return an arbitrary ZST pointer which is properly aligned
415 // invalid pointer is good enough for ZST
416 invalid_mut(mem::align_of::<T>())
417 } else {
418 unsafe { self.ptr.as_ptr().sub(1) }
419 }
420 }
421
422 /// Acquires the underlying non-null pointer `*mut T` to `data`.
423 #[inline]
424 fn as_non_null(&self) -> NonNull<T> {
425 // SAFETY: `self.ptr` is already a `NonNull`
426 unsafe { NonNull::new_unchecked(self.as_ptr()) }
427 }
428
429 /// Create a new [`Bucket`] that is offset from the `self` by the given
430 /// `offset`. The pointer calculation is performed by calculating the
431 /// offset from `self` pointer (convenience for `self.ptr.as_ptr().sub(offset)`).
432 /// This function is used for iterators.
433 ///
434 /// `offset` is in units of `T`; e.g., a `offset` of 3 represents a pointer
435 /// offset of `3 * size_of::<T>()` bytes.
436 ///
437 /// # Safety
438 ///
439 /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived
440 /// from the safety rules for [`<*mut T>::sub`] method of `*mut T` and safety
441 /// rules of [`NonNull::new_unchecked`] function.
442 ///
443 /// Thus, in order to uphold the safety contracts for [`<*mut T>::sub`] method
444 /// and [`NonNull::new_unchecked`] function, as well as for the correct
445 /// logic of the work of this crate, the following rules are necessary and
446 /// sufficient:
447 ///
448 /// * `self` contained pointer must not be `dangling`;
449 ///
450 /// * `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`,
451 /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other
452 /// words, `self.to_base_index() + offset + 1` must be no greater than the number returned
453 /// by the function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`].
454 ///
455 /// If `mem::size_of::<T>() == 0`, then the only requirement is that the
456 /// `self.to_base_index() + offset` must not be greater than `RawTableInner.bucket_mask`,
457 /// i.e. `(self.to_base_index() + offset) <= RawTableInner.bucket_mask` or, in other words,
458 /// `self.to_base_index() + offset + 1` must be no greater than the number returned by the
459 /// function [`RawTable::num_buckets`] or [`RawTableInner::num_buckets`].
460 #[inline]
461 unsafe fn next_n(&self, offset: usize) -> Self {
462 let ptr = if T::IS_ZERO_SIZED {
463 // invalid pointer is good enough for ZST
464 invalid_mut(self.ptr.as_ptr() as usize + offset)
465 } else {
466 unsafe { self.ptr.as_ptr().sub(offset) }
467 };
468 Self {
469 ptr: unsafe { NonNull::new_unchecked(ptr) },
470 }
471 }
472
473 /// Executes the destructor (if any) of the pointed-to `data`.
474 ///
475 /// # Safety
476 ///
477 /// See [`ptr::drop_in_place`] for safety concerns.
478 ///
479 /// You should use [`RawTable::erase`] instead of this function,
480 /// or be careful with calling this function directly, because for
481 /// properly dropping the data we need also clear `data` control bytes.
482 /// If we drop data, but do not erase `data control byte` it leads to
483 /// double drop when [`RawTable`] goes out of scope.
484 #[cfg_attr(feature = "inline-more", inline)]
485 pub(crate) unsafe fn drop(&self) {
486 unsafe {
487 self.as_ptr().drop_in_place();
488 }
489 }
490
491 /// Reads the `value` from `self` without moving it. This leaves the
492 /// memory in `self` unchanged.
493 ///
494 /// # Safety
495 ///
496 /// See [`ptr::read`] for safety concerns.
497 ///
498 /// You should use [`RawTable::remove`] instead of this function,
499 /// or be careful with calling this function directly, because compiler
500 /// calls its destructor when the read `value` goes out of scope. It
501 /// can cause double dropping when [`RawTable`] goes out of scope,
502 /// because of not erased `data control byte`.
503 #[inline]
504 pub(crate) unsafe fn read(&self) -> T {
505 unsafe { self.as_ptr().read() }
506 }
507
508 /// Overwrites a memory location with the given `value` without reading
509 /// or dropping the old value (like [`ptr::write`] function).
510 ///
511 /// # Safety
512 ///
513 /// See [`ptr::write`] for safety concerns.
514 ///
515 /// # Note
516 ///
517 /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match
518 /// those for the old `T` value, as the map will not re-evaluate where the new
519 /// value should go, meaning the value may become "lost" if their location
520 /// does not reflect their state.
521 #[inline]
522 pub(crate) unsafe fn write(&self, val: T) {
523 unsafe {
524 self.as_ptr().write(val);
525 }
526 }
527
528 /// Returns a shared immutable reference to the `value`.
529 ///
530 /// # Safety
531 ///
532 /// See [`NonNull::as_ref`] for safety concerns.
533 #[inline]
534 pub(crate) unsafe fn as_ref<'a>(&self) -> &'a T {
535 unsafe { &*self.as_ptr() }
536 }
537
538 /// Returns a unique mutable reference to the `value`.
539 ///
540 /// # Safety
541 ///
542 /// See [`NonNull::as_mut`] for safety concerns.
543 ///
544 /// # Note
545 ///
546 /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match
547 /// those for the old `T` value, as the map will not re-evaluate where the new
548 /// value should go, meaning the value may become "lost" if their location
549 /// does not reflect their state.
550 #[inline]
551 pub(crate) unsafe fn as_mut<'a>(&self) -> &'a mut T {
552 unsafe { &mut *self.as_ptr() }
553 }
554}
555
556/// A raw hash table with an unsafe API.
557pub(crate) struct RawTable<T, A: Allocator = Global> {
558 table: RawTableInner,
559 alloc: A,
560 // Tell dropck that we own instances of T.
561 marker: PhantomData<T>,
562}
563
564/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless
565/// of how many different key-value types are used.
566struct RawTableInner {
567 // Mask to get an index from a hash value. The value is one less than the
568 // number of buckets in the table.
569 bucket_mask: usize,
570
571 // [Padding], T_n, ..., T1, T0, C0, C1, ...
572 // ^ points here
573 ctrl: NonNull<u8>,
574
575 // Number of elements that can be inserted before we need to grow the table
576 growth_left: usize,
577
578 // Number of elements in the table, only really used by len()
579 items: usize,
580}
581
582impl<T> RawTable<T, Global> {
583 /// Creates a new empty hash table without allocating any memory.
584 ///
585 /// In effect this returns a table with exactly 1 bucket. However we can
586 /// leave the data pointer dangling since that bucket is never written to
587 /// due to our load factor forcing us to always have at least 1 free bucket.
588 #[inline]
589 #[cfg_attr(feature = "rustc-dep-of-std", rustc_const_stable_indirect)]
590 pub(crate) const fn new() -> Self {
591 Self {
592 table: RawTableInner::NEW,
593 alloc: Global,
594 marker: PhantomData,
595 }
596 }
597
598 /// Allocates a new hash table with at least enough capacity for inserting
599 /// the given number of elements without reallocating.
600 pub(crate) fn with_capacity(capacity: usize) -> Self {
601 Self::with_capacity_in(capacity, Global)
602 }
603}
604
605impl<T, A: Allocator> RawTable<T, A> {
606 const TABLE_LAYOUT: TableLayout = TableLayout::new::<T>();
607
608 /// Creates a new empty hash table without allocating any memory, using the
609 /// given allocator.
610 ///
611 /// In effect this returns a table with exactly 1 bucket. However we can
612 /// leave the data pointer dangling since that bucket is never written to
613 /// due to our load factor forcing us to always have at least 1 free bucket.
614 #[inline]
615 #[cfg_attr(feature = "rustc-dep-of-std", rustc_const_stable_indirect)]
616 pub(crate) const fn new_in(alloc: A) -> Self {
617 Self {
618 table: RawTableInner::NEW,
619 alloc,
620 marker: PhantomData,
621 }
622 }
623
624 /// Allocates a new hash table with the given number of buckets.
625 ///
626 /// The control bytes are left uninitialized.
627 #[cfg_attr(feature = "inline-more", inline)]
628 unsafe fn new_uninitialized(
629 alloc: A,
630 buckets: usize,
631 fallibility: Fallibility,
632 ) -> Result<Self, TryReserveError> {
633 debug_assert!(buckets.is_power_of_two());
634
635 Ok(Self {
636 table: unsafe {
637 RawTableInner::new_uninitialized(&alloc, Self::TABLE_LAYOUT, buckets, fallibility)
638 }?,
639 alloc,
640 marker: PhantomData,
641 })
642 }
643
644 /// Allocates a new hash table using the given allocator, with at least enough capacity for
645 /// inserting the given number of elements without reallocating.
646 pub(crate) fn with_capacity_in(capacity: usize, alloc: A) -> Self {
647 Self {
648 table: RawTableInner::with_capacity(&alloc, Self::TABLE_LAYOUT, capacity),
649 alloc,
650 marker: PhantomData,
651 }
652 }
653
654 /// Returns a reference to the underlying allocator.
655 #[inline]
656 pub(crate) fn allocator(&self) -> &A {
657 &self.alloc
658 }
659
660 /// Returns pointer to one past last `data` element in the table as viewed from
661 /// the start point of the allocation.
662 ///
663 /// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
664 /// otherwise using it may result in [`undefined behavior`].
665 ///
666 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
667 #[inline]
668 pub(crate) fn data_end(&self) -> NonNull<T> {
669 // `self.table.ctrl.cast()` returns pointer that
670 // points here (to the end of `T0`)
671 // ∨
672 // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
673 // \________ ________/
674 // \/
675 // `n = buckets - 1`, i.e. `RawTable::num_buckets() - 1`
676 //
677 // where: T0...T_n - our stored data;
678 // CT0...CT_n - control bytes or metadata for `data`.
679 // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
680 // with loading `Group` bytes from the heap works properly, even if the result
681 // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
682 // `RawTableInner::set_ctrl` function.
683 //
684 // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
685 // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
686 self.table.ctrl.cast()
687 }
688
689 /// Returns pointer to start of data table.
690 #[inline]
691 #[cfg(feature = "nightly")]
692 pub(crate) unsafe fn data_start(&self) -> NonNull<T> {
693 unsafe { NonNull::new_unchecked(self.data_end().as_ptr().wrapping_sub(self.num_buckets())) }
694 }
695
696 /// Returns the total amount of memory allocated internally by the hash
697 /// table, in bytes.
698 ///
699 /// The returned number is informational only. It is intended to be
700 /// primarily used for memory profiling.
701 #[inline]
702 pub(crate) fn allocation_size(&self) -> usize {
703 // SAFETY: We use the same `table_layout` that was used to allocate
704 // this table.
705 unsafe { self.table.allocation_size_or_zero(Self::TABLE_LAYOUT) }
706 }
707
708 /// Returns the index of a bucket from a `Bucket`.
709 #[inline]
710 pub(crate) unsafe fn bucket_index(&self, bucket: &Bucket<T>) -> usize {
711 unsafe { bucket.to_base_index(self.data_end()) }
712 }
713
714 /// Returns a pointer to an element in the table.
715 ///
716 /// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
717 /// otherwise using it may result in [`undefined behavior`].
718 ///
719 /// # Safety
720 ///
721 /// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
722 /// following safety rules:
723 ///
724 /// * The table must already be allocated;
725 ///
726 /// * The `index` must not be greater than the number returned by the [`RawTable::num_buckets`]
727 /// function, i.e. `(index + 1) <= self.num_buckets()`.
728 ///
729 /// It is safe to call this function with index of zero (`index == 0`) on a table that has
730 /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
731 ///
732 /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
733 /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e.
734 /// `(index + 1) <= self.num_buckets()`.
735 ///
736 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
737 #[inline]
738 pub(crate) unsafe fn bucket(&self, index: usize) -> Bucket<T> {
739 // If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
740 // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
741 // the "buckets" number of our `RawTable`, i.e. "n = RawTable::num_buckets() - 1"):
742 //
743 // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
744 // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
745 // |
746 // | `base = self.data_end()` points here
747 // | (to the start of CT0 or to the end of T0)
748 // v v
749 // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
750 // ^ \__________ __________/
751 // `table.bucket(3)` returns a pointer that points \/
752 // here in the `data` part of the `RawTable` (to additional control bytes
753 // the end of T3) `m = Group::WIDTH - 1`
754 //
755 // where: T0...T_n - our stored data;
756 // CT0...CT_n - control bytes or metadata for `data`;
757 // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
758 // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
759 // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
760 //
761 // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
762 // of buckets is a power of two, and `self.table.bucket_mask = self.num_buckets() - 1`.
763 debug_assert_ne!(self.table.bucket_mask, 0);
764 debug_assert!(index < self.num_buckets());
765 unsafe { Bucket::from_base_index(self.data_end(), index) }
766 }
767
768 /// Erases an element from the table without dropping it.
769 #[cfg_attr(feature = "inline-more", inline)]
770 unsafe fn erase_no_drop(&mut self, item: &Bucket<T>) {
771 unsafe {
772 let index = self.bucket_index(item);
773 self.table.erase(index);
774 }
775 }
776
777 /// Erases an element from the table, dropping it in place.
778 #[cfg_attr(feature = "inline-more", inline)]
779 #[expect(clippy::needless_pass_by_value)]
780 pub(crate) unsafe fn erase(&mut self, item: Bucket<T>) {
781 unsafe {
782 // Erase the element from the table first since drop might panic.
783 self.erase_no_drop(&item);
784 item.drop();
785 }
786 }
787
788 /// Removes an element from the table, returning it.
789 ///
790 /// This also returns an index to the newly free bucket.
791 #[cfg_attr(feature = "inline-more", inline)]
792 #[expect(clippy::needless_pass_by_value)]
793 pub(crate) unsafe fn remove(&mut self, item: Bucket<T>) -> (T, usize) {
794 unsafe {
795 self.erase_no_drop(&item);
796 (item.read(), self.bucket_index(&item))
797 }
798 }
799
800 /// Removes an element from the table, returning it.
801 ///
802 /// This also returns an index to the newly free bucket
803 /// and the former `Tag` for that bucket.
804 #[cfg_attr(feature = "inline-more", inline)]
805 #[expect(clippy::needless_pass_by_value)]
806 pub(crate) unsafe fn remove_tagged(&mut self, item: Bucket<T>) -> (T, usize, Tag) {
807 unsafe {
808 let index = self.bucket_index(&item);
809 let tag = *self.table.ctrl(index);
810 self.table.erase(index);
811 (item.read(), index, tag)
812 }
813 }
814
815 /// Finds and removes an element from the table, returning it.
816 #[cfg_attr(feature = "inline-more", inline)]
817 pub(crate) fn remove_entry(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<T> {
818 // Avoid `Option::map` because it bloats LLVM IR.
819 match self.find(hash, eq) {
820 Some(bucket) => Some(unsafe { self.remove(bucket).0 }),
821 None => None,
822 }
823 }
824
825 /// Marks all table buckets as empty without dropping their contents.
826 #[cfg_attr(feature = "inline-more", inline)]
827 pub(crate) fn clear_no_drop(&mut self) {
828 self.table.clear_no_drop();
829 }
830
831 /// Removes all elements from the table without freeing the backing memory.
832 #[cfg_attr(feature = "inline-more", inline)]
833 pub(crate) fn clear(&mut self) {
834 if self.is_empty() {
835 // Special case empty table to avoid surprising O(capacity) time.
836 return;
837 }
838 // Ensure that the table is reset even if one of the drops panic
839 let mut self_ = guard(self, |self_| self_.clear_no_drop());
840 unsafe {
841 // SAFETY: ScopeGuard sets to zero the `items` field of the table
842 // even in case of panic during the dropping of the elements so
843 // that there will be no double drop of the elements.
844 self_.table.drop_elements::<T>();
845 }
846 }
847
848 /// Shrinks the table to fit `max(self.len(), min_size)` elements.
849 #[cfg_attr(feature = "inline-more", inline)]
850 pub(crate) fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) {
851 // Calculate the minimal number of elements that we need to reserve
852 // space for.
853 let min_size = usize::max(self.table.items, min_size);
854 if min_size == 0 {
855 let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW);
856 unsafe {
857 // SAFETY:
858 // 1. We call the function only once;
859 // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
860 // and [`TableLayout`] that were used to allocate this table.
861 // 3. If any elements' drop function panics, then there will only be a memory leak,
862 // because we have replaced the inner table with a new one.
863 old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
864 }
865 return;
866 }
867
868 // Calculate the number of buckets that we need for this number of
869 // elements. If the calculation overflows then the requested bucket
870 // count must be larger than what we have right and nothing needs to be
871 // done.
872 let Some(min_buckets) = capacity_to_buckets(min_size, Self::TABLE_LAYOUT) else {
873 return;
874 };
875
876 // If we have more buckets than we need, shrink the table.
877 if min_buckets < self.num_buckets() {
878 // Fast path if the table is empty
879 if self.table.items == 0 {
880 let new_inner =
881 RawTableInner::with_capacity(&self.alloc, Self::TABLE_LAYOUT, min_size);
882 let mut old_inner = mem::replace(&mut self.table, new_inner);
883 unsafe {
884 // SAFETY:
885 // 1. We call the function only once;
886 // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
887 // and [`TableLayout`] that were used to allocate this table.
888 // 3. If any elements' drop function panics, then there will only be a memory leak,
889 // because we have replaced the inner table with a new one.
890 old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
891 }
892 } else {
893 // SAFETY:
894 // 1. We know for sure that `min_size >= self.table.items`.
895 // 2. The [`RawTableInner`] must already have properly initialized control bytes since
896 // we will never expose RawTable::new_uninitialized in a public API.
897 let result = unsafe { self.resize(min_size, hasher, Fallibility::Infallible) };
898
899 // SAFETY: The result of calling the `resize` function cannot be an error
900 // because `fallibility == Fallibility::Infallible.
901 unsafe { result.unwrap_unchecked() };
902 }
903 }
904 }
905
906 /// Ensures that at least `additional` items can be inserted into the table
907 /// without reallocation.
908 #[cfg_attr(feature = "inline-more", inline)]
909 pub(crate) fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) {
910 if unlikely(additional > self.table.growth_left) {
911 // SAFETY: The [`RawTableInner`] must already have properly initialized control
912 // bytes since we will never expose RawTable::new_uninitialized in a public API.
913 let result =
914 unsafe { self.reserve_rehash(additional, hasher, Fallibility::Infallible) };
915
916 // SAFETY: All allocation errors will be caught inside `RawTableInner::reserve_rehash`.
917 unsafe { result.unwrap_unchecked() };
918 }
919 }
920
921 /// Tries to ensure that at least `additional` items can be inserted into
922 /// the table without reallocation.
923 #[cfg_attr(feature = "inline-more", inline)]
924 pub(crate) fn try_reserve(
925 &mut self,
926 additional: usize,
927 hasher: impl Fn(&T) -> u64,
928 ) -> Result<(), TryReserveError> {
929 if additional > self.table.growth_left {
930 // SAFETY: The [`RawTableInner`] must already have properly initialized control
931 // bytes since we will never expose RawTable::new_uninitialized in a public API.
932 unsafe { self.reserve_rehash(additional, hasher, Fallibility::Fallible) }
933 } else {
934 Ok(())
935 }
936 }
937
938 /// Out-of-line slow path for `reserve` and `try_reserve`.
939 ///
940 /// # Safety
941 ///
942 /// The [`RawTableInner`] must have properly initialized control bytes,
943 /// otherwise calling this function results in [`undefined behavior`]
944 ///
945 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
946 #[cold]
947 #[inline(never)]
948 unsafe fn reserve_rehash(
949 &mut self,
950 additional: usize,
951 hasher: impl Fn(&T) -> u64,
952 fallibility: Fallibility,
953 ) -> Result<(), TryReserveError> {
954 unsafe {
955 // SAFETY:
956 // 1. We know for sure that `alloc` and `layout` matches the [`Allocator`] and
957 // [`TableLayout`] that were used to allocate this table.
958 // 2. The `drop` function is the actual drop function of the elements stored in
959 // the table.
960 // 3. The caller ensures that the control bytes of the `RawTableInner`
961 // are already initialized.
962 self.table.reserve_rehash_inner(
963 &self.alloc,
964 additional,
965 &|table, index| hasher(table.bucket::<T>(index).as_ref()),
966 fallibility,
967 Self::TABLE_LAYOUT,
968 if T::NEEDS_DROP {
969 Some(|ptr| ptr::drop_in_place(ptr.cast::<T>()))
970 } else {
971 None
972 },
973 )
974 }
975 }
976
977 /// Allocates a new table of a different size and moves the contents of the
978 /// current table into it.
979 ///
980 /// # Safety
981 ///
982 /// The [`RawTableInner`] must have properly initialized control bytes,
983 /// otherwise calling this function results in [`undefined behavior`]
984 ///
985 /// The caller of this function must ensure that `capacity >= self.table.items`
986 /// otherwise:
987 ///
988 /// * If `self.table.items != 0`, calling of this function with `capacity`
989 /// equal to 0 (`capacity == 0`) results in [`undefined behavior`].
990 ///
991 /// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)`
992 /// calling this function are never return (will loop infinitely).
993 ///
994 /// See [`RawTableInner::find_insert_index`] for more information.
995 ///
996 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
997 unsafe fn resize(
998 &mut self,
999 capacity: usize,
1000 hasher: impl Fn(&T) -> u64,
1001 fallibility: Fallibility,
1002 ) -> Result<(), TryReserveError> {
1003 // SAFETY:
1004 // 1. The caller of this function guarantees that `capacity >= self.table.items`.
1005 // 2. We know for sure that `alloc` and `layout` matches the [`Allocator`] and
1006 // [`TableLayout`] that were used to allocate this table.
1007 // 3. The caller ensures that the control bytes of the `RawTableInner`
1008 // are already initialized.
1009 unsafe {
1010 self.table.resize_inner(
1011 &self.alloc,
1012 capacity,
1013 &|table, index| hasher(table.bucket::<T>(index).as_ref()),
1014 fallibility,
1015 Self::TABLE_LAYOUT,
1016 )
1017 }
1018 }
1019
1020 /// Inserts a new element into the table, and returns its raw bucket.
1021 ///
1022 /// This does not check if the given element already exists in the table.
1023 #[cfg_attr(feature = "inline-more", inline)]
1024 pub(crate) fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket<T> {
1025 unsafe {
1026 // SAFETY:
1027 // 1. The [`RawTableInner`] must already have properly initialized control bytes since
1028 // we will never expose `RawTable::new_uninitialized` in a public API.
1029 //
1030 // 2. We reserve additional space (if necessary) right after calling this function.
1031 let mut index = self.table.find_insert_index(hash);
1032
1033 // We can avoid growing the table once we have reached our load factor if we are replacing
1034 // a tombstone. This works since the number of EMPTY slots does not change in this case.
1035 //
1036 // SAFETY: The function is guaranteed to return an index in the range `0..=self.num_buckets()`.
1037 let old_ctrl = *self.table.ctrl(index);
1038 if unlikely(self.table.growth_left == 0 && old_ctrl.special_is_empty()) {
1039 self.reserve(1, hasher);
1040 // SAFETY: We know for sure that `RawTableInner` has control bytes
1041 // initialized and that there is extra space in the table.
1042 index = self.table.find_insert_index(hash);
1043 }
1044
1045 self.insert_at_index(hash, index, value)
1046 }
1047 }
1048
1049 /// Inserts a new element into the table, and returns a mutable reference to it.
1050 ///
1051 /// This does not check if the given element already exists in the table.
1052 #[cfg_attr(feature = "inline-more", inline)]
1053 pub(crate) fn insert_entry(
1054 &mut self,
1055 hash: u64,
1056 value: T,
1057 hasher: impl Fn(&T) -> u64,
1058 ) -> &mut T {
1059 unsafe { self.insert(hash, value, hasher).as_mut() }
1060 }
1061
1062 /// Inserts a new element into the table, without growing the table.
1063 ///
1064 /// There must be enough space in the table to insert the new element.
1065 ///
1066 /// This does not check if the given element already exists in the table.
1067 #[cfg_attr(feature = "inline-more", inline)]
1068 #[cfg(feature = "rustc-internal-api")]
1069 pub(crate) unsafe fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket<T> {
1070 unsafe {
1071 let (index, old_ctrl) = self.table.prepare_insert_index(hash);
1072 let bucket = self.table.bucket(index);
1073
1074 // If we are replacing a DELETED entry then we don't need to update
1075 // the load counter.
1076 self.table.growth_left -= old_ctrl.special_is_empty() as usize;
1077
1078 bucket.write(value);
1079 self.table.items += 1;
1080 bucket
1081 }
1082 }
1083
1084 /// Temporarily removes a bucket, applying the given function to the removed
1085 /// element and optionally put back the returned value in the same bucket.
1086 ///
1087 /// Returns tag for bucket if the bucket is emptied out.
1088 ///
1089 /// This does not check if the given bucket is actually occupied.
1090 #[cfg_attr(feature = "inline-more", inline)]
1091 pub(crate) unsafe fn replace_bucket_with<F>(&mut self, bucket: Bucket<T>, f: F) -> Option<Tag>
1092 where
1093 F: FnOnce(T) -> Option<T>,
1094 {
1095 unsafe {
1096 let index = self.bucket_index(&bucket);
1097 let old_ctrl = *self.table.ctrl(index);
1098 debug_assert!(self.is_bucket_full(index));
1099 let old_growth_left = self.table.growth_left;
1100 let item = self.remove(bucket).0;
1101 if let Some(new_item) = f(item) {
1102 self.table.growth_left = old_growth_left;
1103 self.table.set_ctrl(index, old_ctrl);
1104 self.table.items += 1;
1105 self.bucket(index).write(new_item);
1106 None
1107 } else {
1108 Some(old_ctrl)
1109 }
1110 }
1111 }
1112
1113 /// Searches for an element in the table. If the element is not found,
1114 /// returns `Err` with the position of a slot where an element with the
1115 /// same hash could be inserted.
1116 ///
1117 /// This function may resize the table if additional space is required for
1118 /// inserting an element.
1119 #[inline]
1120 pub(crate) fn find_or_find_insert_index(
1121 &mut self,
1122 hash: u64,
1123 mut eq: impl FnMut(&T) -> bool,
1124 hasher: impl Fn(&T) -> u64,
1125 ) -> Result<Bucket<T>, usize> {
1126 self.reserve(1, hasher);
1127
1128 unsafe {
1129 // SAFETY:
1130 // 1. We know for sure that there is at least one empty `bucket` in the table.
1131 // 2. The [`RawTableInner`] must already have properly initialized control bytes since we will
1132 // never expose `RawTable::new_uninitialized` in a public API.
1133 // 3. The `find_or_find_insert_index_inner` function returns the `index` of only the full bucket,
1134 // which is in the range `0..self.num_buckets()` (since there is at least one empty `bucket` in
1135 // the table), so calling `self.bucket(index)` and `Bucket::as_ref` is safe.
1136 match self
1137 .table
1138 .find_or_find_insert_index_inner(hash, &mut |index| eq(self.bucket(index).as_ref()))
1139 {
1140 // SAFETY: See explanation above.
1141 Ok(index) => Ok(self.bucket(index)),
1142 Err(index) => Err(index),
1143 }
1144 }
1145 }
1146
1147 /// Inserts a new element into the table at the given index with the given hash,
1148 /// and returns its raw bucket.
1149 ///
1150 /// # Safety
1151 ///
1152 /// `index` must point to a slot previously returned by
1153 /// `find_or_find_insert_index`, and no mutation of the table must have
1154 /// occurred since that call.
1155 #[inline]
1156 pub(crate) unsafe fn insert_at_index(
1157 &mut self,
1158 hash: u64,
1159 index: usize,
1160 value: T,
1161 ) -> Bucket<T> {
1162 unsafe { self.insert_tagged_at_index(Tag::full(hash), index, value) }
1163 }
1164
1165 /// Inserts a new element into the table at the given index with the given tag,
1166 /// and returns its raw bucket.
1167 ///
1168 /// # Safety
1169 ///
1170 /// `index` must point to a slot previously returned by
1171 /// `find_or_find_insert_index`, and no mutation of the table must have
1172 /// occurred since that call.
1173 #[inline]
1174 pub(crate) unsafe fn insert_tagged_at_index(
1175 &mut self,
1176 tag: Tag,
1177 index: usize,
1178 value: T,
1179 ) -> Bucket<T> {
1180 unsafe {
1181 let old_ctrl = *self.table.ctrl(index);
1182 self.table.record_item_insert_at(index, old_ctrl, tag);
1183
1184 let bucket = self.bucket(index);
1185 bucket.write(value);
1186 bucket
1187 }
1188 }
1189
1190 /// Searches for an element in the table.
1191 #[inline]
1192 pub(crate) fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option<Bucket<T>> {
1193 unsafe {
1194 // SAFETY:
1195 // 1. The [`RawTableInner`] must already have properly initialized control bytes since we
1196 // will never expose `RawTable::new_uninitialized` in a public API.
1197 // 1. The `find_inner` function returns the `index` of only the full bucket, which is in
1198 // the range `0..self.num_buckets()`, so calling `self.bucket(index)` and `Bucket::as_ref`
1199 // is safe.
1200 let result = self
1201 .table
1202 .find_inner(hash, &mut |index| eq(self.bucket(index).as_ref()));
1203
1204 // Avoid `Option::map` because it bloats LLVM IR.
1205 match result {
1206 // SAFETY: See explanation above.
1207 Some(index) => Some(self.bucket(index)),
1208 None => None,
1209 }
1210 }
1211 }
1212
1213 /// Gets a reference to an element in the table.
1214 #[inline]
1215 pub(crate) fn get(&self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&T> {
1216 // Avoid `Option::map` because it bloats LLVM IR.
1217 match self.find(hash, eq) {
1218 Some(bucket) => Some(unsafe { bucket.as_ref() }),
1219 None => None,
1220 }
1221 }
1222
1223 /// Gets a mutable reference to an element in the table.
1224 #[inline]
1225 pub(crate) fn get_mut(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option<&mut T> {
1226 // Avoid `Option::map` because it bloats LLVM IR.
1227 match self.find(hash, eq) {
1228 Some(bucket) => Some(unsafe { bucket.as_mut() }),
1229 None => None,
1230 }
1231 }
1232
1233 /// Gets a reference to an element in the table at the given bucket index.
1234 #[inline]
1235 pub(crate) fn get_bucket(&self, index: usize) -> Option<&T> {
1236 unsafe {
1237 if index < self.num_buckets() && self.is_bucket_full(index) {
1238 Some(self.bucket(index).as_ref())
1239 } else {
1240 None
1241 }
1242 }
1243 }
1244
1245 /// Gets a mutable reference to an element in the table at the given bucket index.
1246 #[inline]
1247 pub(crate) fn get_bucket_mut(&mut self, index: usize) -> Option<&mut T> {
1248 unsafe {
1249 if index < self.num_buckets() && self.is_bucket_full(index) {
1250 Some(self.bucket(index).as_mut())
1251 } else {
1252 None
1253 }
1254 }
1255 }
1256
1257 /// Returns a pointer to an element in the table, but only after verifying that
1258 /// the index is in-bounds and the bucket is occupied.
1259 #[inline]
1260 pub(crate) fn checked_bucket(&self, index: usize) -> Option<Bucket<T>> {
1261 unsafe {
1262 if index < self.num_buckets() && self.is_bucket_full(index) {
1263 Some(self.bucket(index))
1264 } else {
1265 None
1266 }
1267 }
1268 }
1269
1270 /// Attempts to get mutable references to `N` entries in the table at once.
1271 ///
1272 /// Returns an array of length `N` with the results of each query.
1273 ///
1274 /// At most one mutable reference will be returned to any entry. `None` will be returned if any
1275 /// of the hashes are duplicates. `None` will be returned if the hash is not found.
1276 ///
1277 /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to
1278 /// the `i`th key to be looked up.
1279 pub(crate) fn get_disjoint_mut<const N: usize>(
1280 &mut self,
1281 hashes: [u64; N],
1282 eq: impl FnMut(usize, &T) -> bool,
1283 ) -> [Option<&'_ mut T>; N] {
1284 unsafe {
1285 let ptrs = self.get_disjoint_mut_pointers(hashes, eq);
1286
1287 for (i, cur) in ptrs.iter().enumerate() {
1288 if cur.is_some() && ptrs[..i].contains(cur) {
1289 panic!("duplicate keys found");
1290 }
1291 }
1292 // All bucket are distinct from all previous buckets so we're clear to return the result
1293 // of the lookup.
1294
1295 ptrs.map(|ptr| ptr.map(|mut ptr| ptr.as_mut()))
1296 }
1297 }
1298
1299 pub(crate) unsafe fn get_disjoint_unchecked_mut<const N: usize>(
1300 &mut self,
1301 hashes: [u64; N],
1302 eq: impl FnMut(usize, &T) -> bool,
1303 ) -> [Option<&'_ mut T>; N] {
1304 let ptrs = unsafe { self.get_disjoint_mut_pointers(hashes, eq) };
1305 ptrs.map(|ptr| ptr.map(|mut ptr| unsafe { ptr.as_mut() }))
1306 }
1307
1308 unsafe fn get_disjoint_mut_pointers<const N: usize>(
1309 &mut self,
1310 hashes: [u64; N],
1311 mut eq: impl FnMut(usize, &T) -> bool,
1312 ) -> [Option<NonNull<T>>; N] {
1313 array::from_fn(|i| {
1314 self.find(hashes[i], |k| eq(i, k))
1315 .map(|cur| cur.as_non_null())
1316 })
1317 }
1318
1319 /// Returns the number of elements the map can hold without reallocating.
1320 ///
1321 /// This number is a lower bound; the table might be able to hold
1322 /// more, but is guaranteed to be able to hold at least this many.
1323 #[inline]
1324 pub(crate) fn capacity(&self) -> usize {
1325 self.table.items + self.table.growth_left
1326 }
1327
1328 /// Returns the number of elements in the table.
1329 #[inline]
1330 pub(crate) fn len(&self) -> usize {
1331 self.table.items
1332 }
1333
1334 /// Returns `true` if the table contains no elements.
1335 #[inline]
1336 pub(crate) fn is_empty(&self) -> bool {
1337 self.len() == 0
1338 }
1339
1340 /// Returns the number of buckets in the table.
1341 #[inline]
1342 pub(crate) fn num_buckets(&self) -> usize {
1343 self.table.bucket_mask + 1
1344 }
1345
1346 /// Checks whether the bucket at `index` is full.
1347 ///
1348 /// # Safety
1349 ///
1350 /// The caller must ensure `index` is less than the number of buckets.
1351 #[inline]
1352 pub(crate) unsafe fn is_bucket_full(&self, index: usize) -> bool {
1353 unsafe { self.table.is_bucket_full(index) }
1354 }
1355
1356 /// Returns an iterator over every element in the table. It is up to
1357 /// the caller to ensure that the `RawTable` outlives the `RawIter`.
1358 /// Because we cannot make the `next` method unsafe on the `RawIter`
1359 /// struct, we have to make the `iter` method unsafe.
1360 #[inline]
1361 pub(crate) unsafe fn iter(&self) -> RawIter<T> {
1362 // SAFETY:
1363 // 1. The caller must uphold the safety contract for `iter` method.
1364 // 2. The [`RawTableInner`] must already have properly initialized control bytes since
1365 // we will never expose RawTable::new_uninitialized in a public API.
1366 unsafe { self.table.iter() }
1367 }
1368
1369 /// Returns an iterator over occupied buckets that could match a given hash.
1370 ///
1371 /// `RawTable` only stores 7 bits of the hash value, so this iterator may
1372 /// return items that have a hash value different than the one provided. You
1373 /// should always validate the returned values before using them.
1374 ///
1375 /// It is up to the caller to ensure that the `RawTable` outlives the
1376 /// `RawIterHash`. Because we cannot make the `next` method unsafe on the
1377 /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe.
1378 #[cfg_attr(feature = "inline-more", inline)]
1379 pub(crate) unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<T> {
1380 unsafe { RawIterHash::new(self, hash) }
1381 }
1382
1383 /// Returns an iterator over occupied bucket indices that could match a given hash.
1384 ///
1385 /// `RawTable` only stores 7 bits of the hash value, so this iterator may
1386 /// return items that have a hash value different than the one provided. You
1387 /// should always validate the returned values before using them.
1388 ///
1389 /// It is up to the caller to ensure that the `RawTable` outlives the
1390 /// `RawIterHashIndices`. Because we cannot make the `next` method unsafe on the
1391 /// `RawIterHashIndices` struct, we have to make the `iter_hash_buckets` method unsafe.
1392 #[cfg_attr(feature = "inline-more", inline)]
1393 pub(crate) unsafe fn iter_hash_buckets(&self, hash: u64) -> RawIterHashIndices {
1394 unsafe { RawIterHashIndices::new(&self.table, hash) }
1395 }
1396
1397 /// Returns an iterator over full buckets indices in the table.
1398 ///
1399 /// See [`RawTableInner::full_buckets_indices`] for safety conditions.
1400 #[inline(always)]
1401 pub(crate) unsafe fn full_buckets_indices(&self) -> FullBucketsIndices {
1402 unsafe { self.table.full_buckets_indices() }
1403 }
1404
1405 /// Returns an iterator which removes all elements from the table without
1406 /// freeing the memory.
1407 #[cfg_attr(feature = "inline-more", inline)]
1408 pub(crate) fn drain(&mut self) -> RawDrain<'_, T, A> {
1409 unsafe {
1410 let iter = self.iter();
1411 self.drain_iter_from(iter)
1412 }
1413 }
1414
1415 /// Returns an iterator which removes all elements from the table without
1416 /// freeing the memory.
1417 ///
1418 /// Iteration starts at the provided iterator's current location.
1419 ///
1420 /// It is up to the caller to ensure that the iterator is valid for this
1421 /// `RawTable` and covers all items that remain in the table.
1422 #[cfg_attr(feature = "inline-more", inline)]
1423 pub(crate) unsafe fn drain_iter_from(&mut self, iter: RawIter<T>) -> RawDrain<'_, T, A> {
1424 debug_assert_eq!(iter.len(), self.len());
1425 RawDrain {
1426 iter,
1427 table: mem::replace(&mut self.table, RawTableInner::NEW),
1428 orig_table: NonNull::from(&mut self.table),
1429 marker: PhantomData,
1430 }
1431 }
1432
1433 /// Returns an iterator which consumes all elements from the table.
1434 ///
1435 /// Iteration starts at the provided iterator's current location.
1436 ///
1437 /// It is up to the caller to ensure that the iterator is valid for this
1438 /// `RawTable` and covers all items that remain in the table.
1439 pub(crate) unsafe fn into_iter_from(self, iter: RawIter<T>) -> RawIntoIter<T, A> {
1440 debug_assert_eq!(iter.len(), self.len());
1441
1442 let allocation = self.into_allocation();
1443 RawIntoIter {
1444 iter,
1445 allocation,
1446 marker: PhantomData,
1447 }
1448 }
1449
1450 /// Converts the table into a raw allocation. The contents of the table
1451 /// should be dropped using a `RawIter` before freeing the allocation.
1452 #[cfg_attr(feature = "inline-more", inline)]
1453 pub(crate) fn into_allocation(self) -> Option<(NonNull<u8>, Layout, A)> {
1454 let alloc = if self.table.is_empty_singleton() {
1455 None
1456 } else {
1457 let (layout, ctrl_offset) = {
1458 let option = Self::TABLE_LAYOUT.calculate_layout_for(self.table.num_buckets());
1459 unsafe { option.unwrap_unchecked() }
1460 };
1461 Some((
1462 unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset).cast()) },
1463 layout,
1464 unsafe { ptr::read(&raw const self.alloc) },
1465 ))
1466 };
1467 mem::forget(self);
1468 alloc
1469 }
1470}
1471
1472unsafe impl<T, A: Allocator> Send for RawTable<T, A>
1473where
1474 T: Send,
1475 A: Send,
1476{
1477}
1478unsafe impl<T, A: Allocator> Sync for RawTable<T, A>
1479where
1480 T: Sync,
1481 A: Sync,
1482{
1483}
1484
1485impl RawTableInner {
1486 const NEW: Self = RawTableInner::new();
1487
1488 /// Creates a new empty hash table without allocating any memory.
1489 ///
1490 /// In effect this returns a table with exactly 1 bucket. However we can
1491 /// leave the data pointer dangling since that bucket is never accessed
1492 /// due to our load factor forcing us to always have at least 1 free bucket.
1493 #[inline]
1494 const fn new() -> Self {
1495 Self {
1496 // Be careful to cast the entire slice to a raw pointer.
1497 ctrl: unsafe {
1498 NonNull::new_unchecked(Group::static_empty().as_ptr().cast_mut().cast())
1499 },
1500 bucket_mask: 0,
1501 items: 0,
1502 growth_left: 0,
1503 }
1504 }
1505}
1506
1507/// Find the previous power of 2. If it's already a power of 2, it's unchanged.
1508/// Passing zero is undefined behavior.
1509pub(crate) fn prev_pow2(z: usize) -> usize {
1510 let shift = mem::size_of::<usize>() * 8 - 1;
1511 1 << (shift - (z.leading_zeros() as usize))
1512}
1513
1514/// Finds the largest number of buckets that can fit in `allocation_size`
1515/// provided the given TableLayout.
1516///
1517/// This relies on some invariants of `capacity_to_buckets`, so only feed in
1518/// an `allocation_size` calculated from `capacity_to_buckets`.
1519fn maximum_buckets_in(
1520 allocation_size: usize,
1521 table_layout: TableLayout,
1522 group_width: usize,
1523) -> usize {
1524 // Given an equation like:
1525 // z >= x * y + x + g
1526 // x can be maximized by doing:
1527 // x = (z - g) / (y + 1)
1528 // If you squint:
1529 // x is the number of buckets
1530 // y is the table_layout.size
1531 // z is the size of the allocation
1532 // g is the group width
1533 // But this is ignoring the padding needed for ctrl_align.
1534 // If we remember these restrictions:
1535 // x is always a power of 2
1536 // Layout size for T must always be a multiple of T
1537 // Then the alignment can be ignored if we add the constraint:
1538 // x * y >= table_layout.ctrl_align
1539 // This is taken care of by `capacity_to_buckets`.
1540 // It may be helpful to understand this if you remember that:
1541 // ctrl_offset = align(x * y, ctrl_align)
1542 let x = (allocation_size - group_width) / (table_layout.size + 1);
1543 prev_pow2(x)
1544}
1545
1546impl RawTableInner {
1547 /// Allocates a new [`RawTableInner`] with the given number of buckets.
1548 /// The control bytes and buckets are left uninitialized.
1549 ///
1550 /// # Safety
1551 ///
1552 /// The caller of this function must ensure that the `buckets` is power of two
1553 /// and also initialize all control bytes of the length `self.bucket_mask + 1 +
1554 /// Group::WIDTH` with the [`Tag::EMPTY`] bytes.
1555 ///
1556 /// See also [`Allocator`] API for other safety concerns.
1557 ///
1558 /// [`Allocator`]: stdalloc::alloc::Allocator
1559 #[cfg_attr(feature = "inline-more", inline)]
1560 unsafe fn new_uninitialized<A>(
1561 alloc: &A,
1562 table_layout: TableLayout,
1563 mut buckets: usize,
1564 fallibility: Fallibility,
1565 ) -> Result<Self, TryReserveError>
1566 where
1567 A: Allocator,
1568 {
1569 debug_assert!(buckets.is_power_of_two());
1570
1571 // Avoid `Option::ok_or_else` because it bloats LLVM IR.
1572 let Some((layout, mut ctrl_offset)) = table_layout.calculate_layout_for(buckets) else {
1573 return Err(fallibility.capacity_overflow());
1574 };
1575
1576 let ptr: NonNull<u8> = match do_alloc(alloc, layout) {
1577 Ok(block) => {
1578 // The allocator can't return a value smaller than was
1579 // requested, so this can be != instead of >=.
1580 if block.len() != layout.size() {
1581 // Utilize over-sized allocations.
1582 let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH);
1583 debug_assert!(x >= buckets);
1584 // Calculate the new ctrl_offset.
1585 let (oversized_layout, oversized_ctrl_offset) = {
1586 let option = table_layout.calculate_layout_for(x);
1587 unsafe { option.unwrap_unchecked() }
1588 };
1589 debug_assert!(oversized_layout.size() <= block.len());
1590 debug_assert!(oversized_ctrl_offset >= ctrl_offset);
1591 ctrl_offset = oversized_ctrl_offset;
1592 buckets = x;
1593 }
1594
1595 block.cast()
1596 }
1597 Err(_) => return Err(fallibility.alloc_err(layout)),
1598 };
1599
1600 // SAFETY: null pointer will be caught in above check
1601 let ctrl = unsafe { NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)) };
1602 Ok(Self {
1603 ctrl,
1604 bucket_mask: buckets - 1,
1605 items: 0,
1606 growth_left: bucket_mask_to_capacity(buckets - 1),
1607 })
1608 }
1609
1610 /// Attempts to allocate a new [`RawTableInner`] with at least enough
1611 /// capacity for inserting the given number of elements without reallocating.
1612 ///
1613 /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes.
1614 #[inline]
1615 fn fallible_with_capacity<A>(
1616 alloc: &A,
1617 table_layout: TableLayout,
1618 capacity: usize,
1619 fallibility: Fallibility,
1620 ) -> Result<Self, TryReserveError>
1621 where
1622 A: Allocator,
1623 {
1624 if capacity == 0 {
1625 Ok(Self::NEW)
1626 } else {
1627 // SAFETY: We checked that we could successfully allocate the new table, and then
1628 // initialized all control bytes with the constant `Tag::EMPTY` byte.
1629 unsafe {
1630 let buckets = capacity_to_buckets(capacity, table_layout)
1631 .ok_or_else(|| fallibility.capacity_overflow())?;
1632
1633 let mut result =
1634 Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?;
1635 // SAFETY: We checked that the table is allocated and therefore the table already has
1636 // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for)
1637 // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe.
1638 result.ctrl_slice().fill_empty();
1639
1640 Ok(result)
1641 }
1642 }
1643 }
1644
1645 /// Allocates a new [`RawTableInner`] with at least enough capacity for inserting
1646 /// the given number of elements without reallocating.
1647 ///
1648 /// Panics if the new capacity exceeds [`isize::MAX`] bytes and [`abort`] the program
1649 /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to
1650 /// handle memory allocation failure.
1651 ///
1652 /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes.
1653 ///
1654 /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity
1655 /// [`abort`]: stdalloc::abort::handle_alloc_error
1656 fn with_capacity<A>(alloc: &A, table_layout: TableLayout, capacity: usize) -> Self
1657 where
1658 A: Allocator,
1659 {
1660 let result =
1661 Self::fallible_with_capacity(alloc, table_layout, capacity, Fallibility::Infallible);
1662
1663 // SAFETY: All allocation errors will be caught inside `RawTableInner::new_uninitialized`.
1664 unsafe { result.unwrap_unchecked() }
1665 }
1666
1667 /// Fixes up an insertion index returned by the [`RawTableInner::find_insert_index_in_group`] method.
1668 ///
1669 /// In tables smaller than the group width (`self.num_buckets() < Group::WIDTH`), trailing control
1670 /// bytes outside the range of the table are filled with [`Tag::EMPTY`] entries. These will unfortunately
1671 /// trigger a match of [`RawTableInner::find_insert_index_in_group`] function. This is because
1672 /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking
1673 /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied.
1674 /// We detect this situation here and perform a second scan starting at the beginning of the table.
1675 /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the
1676 /// trailing control bytes (containing [`Tag::EMPTY`] bytes).
1677 ///
1678 /// If this function is called correctly, it is guaranteed to return an index of an empty or
1679 /// deleted bucket in the range `0..self.num_buckets()` (see `Warning` and `Safety`).
1680 ///
1681 /// # Warning
1682 ///
1683 /// The table must have at least 1 empty or deleted `bucket`, otherwise if the table is less than
1684 /// the group width (`self.num_buckets() < Group::WIDTH`) this function returns an index outside of the
1685 /// table indices range `0..self.num_buckets()` (`0..=self.bucket_mask`). Attempt to write data at that
1686 /// index will cause immediate [`undefined behavior`].
1687 ///
1688 /// # Safety
1689 ///
1690 /// The safety rules are directly derived from the safety rules for [`RawTableInner::ctrl`] method.
1691 /// Thus, in order to uphold those safety contracts, as well as for the correct logic of the work
1692 /// of this crate, the following rules are necessary and sufficient:
1693 ///
1694 /// * The [`RawTableInner`] must have properly initialized control bytes otherwise calling this
1695 /// function results in [`undefined behavior`].
1696 ///
1697 /// * This function must only be used on insertion indices found by [`RawTableInner::find_insert_index_in_group`]
1698 /// (after the `find_insert_index_in_group` function, but before insertion into the table).
1699 ///
1700 /// * The `index` must not be greater than the `self.bucket_mask`, i.e. `(index + 1) <= self.num_buckets()`
1701 /// (this one is provided by the [`RawTableInner::find_insert_index_in_group`] function).
1702 ///
1703 /// Calling this function with an index not provided by [`RawTableInner::find_insert_index_in_group`]
1704 /// may result in [`undefined behavior`] even if the index satisfies the safety rules of the
1705 /// [`RawTableInner::ctrl`] function (`index < self.bucket_mask + 1 + Group::WIDTH`).
1706 ///
1707 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1708 #[inline]
1709 unsafe fn fix_insert_index(&self, mut index: usize) -> usize {
1710 // SAFETY: The caller of this function ensures that `index` is in the range `0..=self.bucket_mask`.
1711 if unlikely(unsafe { self.is_bucket_full(index) }) {
1712 debug_assert!(self.bucket_mask < Group::WIDTH);
1713 // SAFETY:
1714 //
1715 // * Since the caller of this function ensures that the control bytes are properly
1716 // initialized and `ptr = self.ctrl(0)` points to the start of the array of control
1717 // bytes, therefore: `ctrl` is valid for reads, properly aligned to `Group::WIDTH`
1718 // and points to the properly initialized control bytes (see also
1719 // `TableLayout::calculate_layout_for` and `ptr::read`);
1720 //
1721 // * Because the caller of this function ensures that the index was provided by the
1722 // `self.find_insert_index_in_group()` function, so for for tables larger than the
1723 // group width (self.num_buckets() >= Group::WIDTH), we will never end up in the given
1724 // branch, since `(probe_seq.pos + bit) & self.bucket_mask` in `find_insert_index_in_group`
1725 // cannot return a full bucket index. For tables smaller than the group width, calling
1726 // the `unwrap_unchecked` function is also safe, as the trailing control bytes outside
1727 // the range of the table are filled with EMPTY bytes (and we know for sure that there
1728 // is at least one FULL bucket), so this second scan either finds an empty slot (due to
1729 // the load factor) or hits the trailing control bytes (containing EMPTY).
1730 index = unsafe {
1731 Group::load_aligned(self.ctrl(0))
1732 .match_empty_or_deleted()
1733 .lowest_set_bit()
1734 .unwrap_unchecked()
1735 };
1736 }
1737 index
1738 }
1739
1740 /// Finds the position to insert something in a group.
1741 ///
1742 /// **This may have false positives and must be fixed up with `fix_insert_index`
1743 /// before it's used.**
1744 ///
1745 /// The function is guaranteed to return the index of an empty or deleted [`Bucket`]
1746 /// in the range `0..self.num_buckets()` (`0..=self.bucket_mask`).
1747 #[inline]
1748 fn find_insert_index_in_group(&self, group: &Group, probe_seq: &ProbeSeq) -> Option<usize> {
1749 let bit = group.match_empty_or_deleted().lowest_set_bit();
1750
1751 if likely(bit.is_some()) {
1752 // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number
1753 // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
1754 Some((probe_seq.pos + bit.unwrap()) & self.bucket_mask)
1755 } else {
1756 None
1757 }
1758 }
1759
1760 /// Searches for an element in the table, or a potential slot where that element could
1761 /// be inserted (an empty or deleted [`Bucket`] index).
1762 ///
1763 /// This uses dynamic dispatch to reduce the amount of code generated, but that is
1764 /// eliminated by LLVM optimizations.
1765 ///
1766 /// This function does not make any changes to the `data` part of the table, or any
1767 /// changes to the `items` or `growth_left` field of the table.
1768 ///
1769 /// The table must have at least 1 empty or deleted `bucket`, otherwise, if the
1770 /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`, this function
1771 /// will never return (will go into an infinite loop) for tables larger than the group
1772 /// width, or return an index outside of the table indices range if the table is less
1773 /// than the group width.
1774 ///
1775 /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool`
1776 /// function with only `FULL` buckets' indices and return the `index` of the found
1777 /// element (as `Ok(index)`). If the element is not found and there is at least 1
1778 /// empty or deleted [`Bucket`] in the table, the function is guaranteed to return
1779 /// an index in the range `0..self.num_buckets()`, but in any case, if this function
1780 /// returns `Err`, it will contain an index in the range `0..=self.num_buckets()`.
1781 ///
1782 /// # Safety
1783 ///
1784 /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
1785 /// this function results in [`undefined behavior`].
1786 ///
1787 /// Attempt to write data at the index returned by this function when the table is less than
1788 /// the group width and if there was not at least one empty or deleted bucket in the table
1789 /// will cause immediate [`undefined behavior`]. This is because in this case the function
1790 /// will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] control
1791 /// bytes outside the table range.
1792 ///
1793 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1794 #[inline]
1795 unsafe fn find_or_find_insert_index_inner(
1796 &self,
1797 hash: u64,
1798 eq: &mut dyn FnMut(usize) -> bool,
1799 ) -> Result<usize, usize> {
1800 let mut insert_index = None;
1801
1802 let tag_hash = Tag::full(hash);
1803 let mut probe_seq = self.probe_seq(hash);
1804
1805 loop {
1806 // SAFETY:
1807 // * Caller of this function ensures that the control bytes are properly initialized.
1808 //
1809 // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1`
1810 // of the table due to masking with `self.bucket_mask` and also because the number
1811 // of buckets is a power of two (see `self.probe_seq` function).
1812 //
1813 // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
1814 // call `Group::load` due to the extended control bytes range, which is
1815 // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
1816 // byte will never be read for the allocated table);
1817 //
1818 // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
1819 // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
1820 // bytes, which is safe (see RawTableInner::new).
1821 let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
1822
1823 for bit in group.match_tag(tag_hash) {
1824 let index = (probe_seq.pos + bit) & self.bucket_mask;
1825
1826 if likely(eq(index)) {
1827 return Ok(index);
1828 }
1829 }
1830
1831 // We didn't find the element we were looking for in the group, try to get an
1832 // insertion slot from the group if we don't have one yet.
1833 if likely(insert_index.is_none()) {
1834 insert_index = self.find_insert_index_in_group(&group, &probe_seq);
1835 }
1836
1837 if let Some(insert_index) = insert_index {
1838 // Only stop the search if the group contains at least one empty element.
1839 // Otherwise, the element that we are looking for might be in a following group.
1840 if likely(group.match_empty().any_bit_set()) {
1841 // We must have found a insert slot by now, since the current group contains at
1842 // least one. For tables smaller than the group width, there will still be an
1843 // empty element in the current (and only) group due to the load factor.
1844 unsafe {
1845 // SAFETY:
1846 // * Caller of this function ensures that the control bytes are properly initialized.
1847 //
1848 // * We use this function with the index found by `self.find_insert_index_in_group`
1849 return Err(self.fix_insert_index(insert_index));
1850 }
1851 }
1852 }
1853
1854 probe_seq.move_next(self.bucket_mask);
1855 }
1856 }
1857
1858 /// Searches for an empty or deleted bucket which is suitable for inserting a new
1859 /// element and sets the hash for that slot. Returns an index of that slot and the
1860 /// old control byte stored in the found index.
1861 ///
1862 /// This function does not check if the given element exists in the table. Also,
1863 /// this function does not check if there is enough space in the table to insert
1864 /// a new element. The caller of the function must make sure that the table has at
1865 /// least 1 empty or deleted `bucket`, otherwise this function will never return
1866 /// (will go into an infinite loop) for tables larger than the group width, or
1867 /// return an index outside of the table indices range if the table is less than
1868 /// the group width.
1869 ///
1870 /// If there is at least 1 empty or deleted `bucket` in the table, the function is
1871 /// guaranteed to return an `index` in the range `0..self.num_buckets()`, but in any case,
1872 /// if this function returns an `index` it will be in the range `0..=self.num_buckets()`.
1873 ///
1874 /// This function does not make any changes to the `data` parts of the table,
1875 /// or any changes to the `items` or `growth_left` field of the table.
1876 ///
1877 /// # Safety
1878 ///
1879 /// The safety rules are directly derived from the safety rules for the
1880 /// [`RawTableInner::set_ctrl_hash`] and [`RawTableInner::find_insert_index`] methods.
1881 /// Thus, in order to uphold the safety contracts for that methods, as well as for
1882 /// the correct logic of the work of this crate, you must observe the following rules
1883 /// when calling this function:
1884 ///
1885 /// * The [`RawTableInner`] has already been allocated and has properly initialized
1886 /// control bytes otherwise calling this function results in [`undefined behavior`].
1887 ///
1888 /// * The caller of this function must ensure that the "data" parts of the table
1889 /// will have an entry in the returned index (matching the given hash) right
1890 /// after calling this function.
1891 ///
1892 /// Attempt to write data at the `index` returned by this function when the table is
1893 /// less than the group width and if there was not at least one empty or deleted bucket in
1894 /// the table will cause immediate [`undefined behavior`]. This is because in this case the
1895 /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`]
1896 /// control bytes outside the table range.
1897 ///
1898 /// The caller must independently increase the `items` field of the table, and also,
1899 /// if the old control byte was [`Tag::EMPTY`], then decrease the table's `growth_left`
1900 /// field, and do not change it if the old control byte was [`Tag::DELETED`].
1901 ///
1902 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1903 /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`].
1904 ///
1905 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1906 #[inline]
1907 unsafe fn prepare_insert_index(&mut self, hash: u64) -> (usize, Tag) {
1908 unsafe {
1909 // SAFETY: Caller of this function ensures that the control bytes are properly initialized.
1910 let index: usize = self.find_insert_index(hash);
1911 // SAFETY:
1912 // 1. The `find_insert_index` function either returns an `index` less than or
1913 // equal to `self.num_buckets() = self.bucket_mask + 1` of the table, or never
1914 // returns if it cannot find an empty or deleted slot.
1915 // 2. The caller of this function guarantees that the table has already been
1916 // allocated
1917 let old_ctrl = *self.ctrl(index);
1918 self.set_ctrl_hash(index, hash);
1919 (index, old_ctrl)
1920 }
1921 }
1922
1923 /// Searches for an empty or deleted bucket which is suitable for inserting
1924 /// a new element, returning the `index` for the new [`Bucket`].
1925 ///
1926 /// This function does not make any changes to the `data` part of the table, or any
1927 /// changes to the `items` or `growth_left` field of the table.
1928 ///
1929 /// The table must have at least 1 empty or deleted `bucket`, otherwise this function
1930 /// will never return (will go into an infinite loop) for tables larger than the group
1931 /// width, or return an index outside of the table indices range if the table is less
1932 /// than the group width.
1933 ///
1934 /// If there is at least 1 empty or deleted `bucket` in the table, the function is
1935 /// guaranteed to return an index in the range `0..self.num_buckets()`, but in any case,
1936 /// it will contain an index in the range `0..=self.num_buckets()`.
1937 ///
1938 /// # Safety
1939 ///
1940 /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
1941 /// this function results in [`undefined behavior`].
1942 ///
1943 /// Attempt to write data at the index returned by this function when the table is
1944 /// less than the group width and if there was not at least one empty or deleted bucket in
1945 /// the table will cause immediate [`undefined behavior`]. This is because in this case the
1946 /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`]
1947 /// control bytes outside the table range.
1948 ///
1949 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
1950 #[inline]
1951 unsafe fn find_insert_index(&self, hash: u64) -> usize {
1952 let mut probe_seq = self.probe_seq(hash);
1953 loop {
1954 // SAFETY:
1955 // * Caller of this function ensures that the control bytes are properly initialized.
1956 //
1957 // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1`
1958 // of the table due to masking with `self.bucket_mask` and also because the number
1959 // of buckets is a power of two (see `self.probe_seq` function).
1960 //
1961 // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
1962 // call `Group::load` due to the extended control bytes range, which is
1963 // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
1964 // byte will never be read for the allocated table);
1965 //
1966 // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
1967 // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
1968 // bytes, which is safe (see RawTableInner::new).
1969 let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
1970
1971 let index = self.find_insert_index_in_group(&group, &probe_seq);
1972 if likely(index.is_some()) {
1973 // SAFETY:
1974 // * Caller of this function ensures that the control bytes are properly initialized.
1975 //
1976 // * We use this function with the slot / index found by `self.find_insert_index_in_group`
1977 unsafe {
1978 return self.fix_insert_index(index.unwrap_unchecked());
1979 }
1980 }
1981 probe_seq.move_next(self.bucket_mask);
1982 }
1983 }
1984
1985 /// Searches for an element in a table, returning the `index` of the found element.
1986 /// This uses dynamic dispatch to reduce the amount of code generated, but it is
1987 /// eliminated by LLVM optimizations.
1988 ///
1989 /// This function does not make any changes to the `data` part of the table, or any
1990 /// changes to the `items` or `growth_left` field of the table.
1991 ///
1992 /// The table must have at least 1 empty `bucket`, otherwise, if the
1993 /// `eq: &mut dyn FnMut(usize) -> bool` function does not return `true`,
1994 /// this function will also never return (will go into an infinite loop).
1995 ///
1996 /// This function is guaranteed to provide the `eq: &mut dyn FnMut(usize) -> bool`
1997 /// function with only `FULL` buckets' indices and return the `index` of the found
1998 /// element as `Some(index)`, so the index will always be in the range
1999 /// `0..self.num_buckets()`.
2000 ///
2001 /// # Safety
2002 ///
2003 /// The [`RawTableInner`] must have properly initialized control bytes otherwise calling
2004 /// this function results in [`undefined behavior`].
2005 ///
2006 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2007 #[inline(always)]
2008 unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option<usize> {
2009 let tag_hash = Tag::full(hash);
2010 let mut probe_seq = self.probe_seq(hash);
2011
2012 loop {
2013 // SAFETY:
2014 // * Caller of this function ensures that the control bytes are properly initialized.
2015 //
2016 // * `ProbeSeq.pos` cannot be greater than `self.bucket_mask = self.num_buckets() - 1`
2017 // of the table due to masking with `self.bucket_mask`.
2018 //
2019 // * Even if `ProbeSeq.pos` returns `position == self.bucket_mask`, it is safe to
2020 // call `Group::load` due to the extended control bytes range, which is
2021 // `self.bucket_mask + 1 + Group::WIDTH` (in fact, this means that the last control
2022 // byte will never be read for the allocated table);
2023 //
2024 // * Also, even if `RawTableInner` is not already allocated, `ProbeSeq.pos` will
2025 // always return "0" (zero), so Group::load will read unaligned `Group::static_empty()`
2026 // bytes, which is safe (see RawTableInner::new_in).
2027 let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) };
2028
2029 for bit in group.match_tag(tag_hash) {
2030 // This is the same as `(probe_seq.pos + bit) % self.num_buckets()` because the number
2031 // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2032 let index = (probe_seq.pos + bit) & self.bucket_mask;
2033
2034 if likely(eq(index)) {
2035 return Some(index);
2036 }
2037 }
2038
2039 if likely(group.match_empty().any_bit_set()) {
2040 return None;
2041 }
2042
2043 probe_seq.move_next(self.bucket_mask);
2044 }
2045 }
2046
2047 /// Prepares for rehashing data in place (that is, without allocating new memory).
2048 /// Converts all full index `control bytes` to `Tag::DELETED` and all `Tag::DELETED` control
2049 /// bytes to `Tag::EMPTY`, i.e. performs the following conversion:
2050 ///
2051 /// - `Tag::EMPTY` control bytes -> `Tag::EMPTY`;
2052 /// - `Tag::DELETED` control bytes -> `Tag::EMPTY`;
2053 /// - `FULL` control bytes -> `Tag::DELETED`.
2054 ///
2055 /// This function does not make any changes to the `data` parts of the table,
2056 /// or any changes to the `items` or `growth_left` field of the table.
2057 ///
2058 /// # Safety
2059 ///
2060 /// You must observe the following safety rules when calling this function:
2061 ///
2062 /// * The [`RawTableInner`] has already been allocated;
2063 ///
2064 /// * The caller of this function must convert the `Tag::DELETED` bytes back to `FULL`
2065 /// bytes when re-inserting them into their ideal position (which was impossible
2066 /// to do during the first insert due to tombstones). If the caller does not do
2067 /// this, then calling this function may result in a memory leak.
2068 ///
2069 /// * The [`RawTableInner`] must have properly initialized control bytes otherwise
2070 /// calling this function results in [`undefined behavior`].
2071 ///
2072 /// Calling this function on a table that has not been allocated results in
2073 /// [`undefined behavior`].
2074 ///
2075 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2076 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2077 ///
2078 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2079 #[inline]
2080 unsafe fn prepare_rehash_in_place(&mut self) {
2081 // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
2082 // This effectively frees up all buckets containing a DELETED entry.
2083 //
2084 // SAFETY:
2085 // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
2086 // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
2087 // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
2088 // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
2089 // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
2090 // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
2091 unsafe {
2092 for i in (0..self.num_buckets()).step_by(Group::WIDTH) {
2093 let group = Group::load_aligned(self.ctrl(i));
2094 let group = group.convert_special_to_empty_and_full_to_deleted();
2095 group.store_aligned(self.ctrl(i));
2096 }
2097 }
2098
2099 // Fix up the trailing control bytes. See the comments in set_ctrl
2100 // for the handling of tables smaller than the group width.
2101 if unlikely(self.num_buckets() < Group::WIDTH) {
2102 // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
2103 // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to
2104 // `Group::WIDTH` is safe
2105 unsafe {
2106 self.ctrl(0)
2107 .copy_to(self.ctrl(Group::WIDTH), self.num_buckets());
2108 }
2109 } else {
2110 // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
2111 // control bytes,so copying `Group::WIDTH` bytes with offset equal
2112 // to `self.num_buckets() == self.bucket_mask + 1` is safe
2113 unsafe {
2114 self.ctrl(0)
2115 .copy_to(self.ctrl(self.num_buckets()), Group::WIDTH);
2116 }
2117 }
2118 }
2119
2120 /// Returns an iterator over every element in the table.
2121 ///
2122 /// # Safety
2123 ///
2124 /// If any of the following conditions are violated, the result
2125 /// is [`undefined behavior`]:
2126 ///
2127 /// * The caller has to ensure that the `RawTableInner` outlives the
2128 /// `RawIter`. Because we cannot make the `next` method unsafe on
2129 /// the `RawIter` struct, we have to make the `iter` method unsafe.
2130 ///
2131 /// * The [`RawTableInner`] must have properly initialized control bytes.
2132 ///
2133 /// The type `T` must be the actual type of the elements stored in the table,
2134 /// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2135 ///
2136 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2137 #[inline]
2138 unsafe fn iter<T>(&self) -> RawIter<T> {
2139 // SAFETY:
2140 // 1. Since the caller of this function ensures that the control bytes
2141 // are properly initialized and `self.data_end()` points to the start
2142 // of the array of control bytes, therefore: `ctrl` is valid for reads,
2143 // properly aligned to `Group::WIDTH` and points to the properly initialized
2144 // control bytes.
2145 // 2. `data` bucket index in the table is equal to the `ctrl` index (i.e.
2146 // equal to zero).
2147 // 3. We pass the exact value of buckets of the table to the function.
2148 //
2149 // `ctrl` points here (to the start
2150 // of the first control byte `CT0`)
2151 // ∨
2152 // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2153 // \________ ________/
2154 // \/
2155 // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1`
2156 //
2157 // where: T0...T_n - our stored data;
2158 // CT0...CT_n - control bytes or metadata for `data`.
2159 // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2160 // with loading `Group` bytes from the heap works properly, even if the result
2161 // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2162 // `RawTableInner::set_ctrl` function.
2163 //
2164 // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
2165 // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2166 unsafe {
2167 let data = Bucket::from_base_index(self.data_end(), 0);
2168 RawIter {
2169 // SAFETY: See explanation above
2170 iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.num_buckets()),
2171 items: self.items,
2172 }
2173 }
2174 }
2175
2176 /// Executes the destructors (if any) of the values stored in the table.
2177 ///
2178 /// # Note
2179 ///
2180 /// This function does not erase the control bytes of the table and does
2181 /// not make any changes to the `items` or `growth_left` fields of the
2182 /// table. If necessary, the caller of this function must manually set
2183 /// up these table fields, for example using the [`clear_no_drop`] function.
2184 ///
2185 /// Be careful during calling this function, because drop function of
2186 /// the elements can panic, and this can leave table in an inconsistent
2187 /// state.
2188 ///
2189 /// # Safety
2190 ///
2191 /// The type `T` must be the actual type of the elements stored in the table,
2192 /// otherwise calling this function may result in [`undefined behavior`].
2193 ///
2194 /// If `T` is a type that should be dropped and **the table is not empty**,
2195 /// calling this function more than once results in [`undefined behavior`].
2196 ///
2197 /// If `T` is not [`Copy`], attempting to use values stored in the table after
2198 /// calling this function may result in [`undefined behavior`].
2199 ///
2200 /// It is safe to call this function on a table that has not been allocated,
2201 /// on a table with uninitialized control bytes, and on a table with no actual
2202 /// data but with `Full` control bytes if `self.items == 0`.
2203 ///
2204 /// See also [`Bucket::drop`] / [`Bucket::as_ptr`] methods, for more information
2205 /// about of properly removing or saving `element` from / into the [`RawTable`] /
2206 /// [`RawTableInner`].
2207 ///
2208 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2209 unsafe fn drop_elements<T>(&mut self) {
2210 // Check that `self.items != 0`. Protects against the possibility
2211 // of creating an iterator on an table with uninitialized control bytes.
2212 if T::NEEDS_DROP && self.items != 0 {
2213 // SAFETY: We know for sure that RawTableInner will outlive the
2214 // returned `RawIter` iterator, and the caller of this function
2215 // must uphold the safety contract for `drop_elements` method.
2216 unsafe {
2217 for item in self.iter::<T>() {
2218 // SAFETY: The caller must uphold the safety contract for
2219 // `drop_elements` method.
2220 item.drop();
2221 }
2222 }
2223 }
2224 }
2225
2226 /// Executes the destructors (if any) of the values stored in the table and than
2227 /// deallocates the table.
2228 ///
2229 /// # Note
2230 ///
2231 /// Calling this function automatically makes invalid (dangling) all instances of
2232 /// buckets ([`Bucket`]) and makes invalid (dangling) the `ctrl` field of the table.
2233 ///
2234 /// This function does not make any changes to the `bucket_mask`, `items` or `growth_left`
2235 /// fields of the table. If necessary, the caller of this function must manually set
2236 /// up these table fields.
2237 ///
2238 /// # Safety
2239 ///
2240 /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2241 ///
2242 /// * Calling this function more than once;
2243 ///
2244 /// * The type `T` must be the actual type of the elements stored in the table.
2245 ///
2246 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
2247 /// to allocate this table.
2248 ///
2249 /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that
2250 /// was used to allocate this table.
2251 ///
2252 /// The caller of this function should pay attention to the possibility of the
2253 /// elements' drop function panicking, because this:
2254 ///
2255 /// * May leave the table in an inconsistent state;
2256 ///
2257 /// * Memory is never deallocated, so a memory leak may occur.
2258 ///
2259 /// Attempt to use the `ctrl` field of the table (dereference) after calling this
2260 /// function results in [`undefined behavior`].
2261 ///
2262 /// It is safe to call this function on a table that has not been allocated,
2263 /// on a table with uninitialized control bytes, and on a table with no actual
2264 /// data but with `Full` control bytes if `self.items == 0`.
2265 ///
2266 /// See also [`RawTableInner::drop_elements`] or [`RawTableInner::free_buckets`]
2267 /// for more information.
2268 ///
2269 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2270 unsafe fn drop_inner_table<T, A: Allocator>(&mut self, alloc: &A, table_layout: TableLayout) {
2271 if !self.is_empty_singleton() {
2272 // SAFETY: The caller must uphold the safety contract for `drop_inner_table` method.
2273 unsafe {
2274 self.drop_elements::<T>();
2275 }
2276 // SAFETY:
2277 // 1. We have checked that our table is allocated.
2278 // 2. The caller must uphold the safety contract for `drop_inner_table` method.
2279 unsafe {
2280 self.free_buckets(alloc, table_layout);
2281 }
2282 }
2283 }
2284
2285 /// Returns a pointer to an element in the table (convenience for
2286 /// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2287 ///
2288 /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2289 /// otherwise using it may result in [`undefined behavior`].
2290 ///
2291 /// # Safety
2292 ///
2293 /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2294 /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2295 /// this function, the following safety rules must be observed:
2296 ///
2297 /// * The table must already be allocated;
2298 ///
2299 /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`]
2300 /// function, i.e. `(index + 1) <= self.num_buckets()`.
2301 ///
2302 /// * The type `T` must be the actual type of the elements stored in the table, otherwise
2303 /// using the returned [`Bucket`] may result in [`undefined behavior`].
2304 ///
2305 /// It is safe to call this function with index of zero (`index == 0`) on a table that has
2306 /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2307 ///
2308 /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2309 /// not be greater than the number returned by the [`RawTable::num_buckets`] function, i.e.
2310 /// `(index + 1) <= self.num_buckets()`.
2311 ///
2312 /// ```none
2313 /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2314 /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2315 /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"):
2316 ///
2317 /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2318 /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2319 /// |
2320 /// | `base = table.data_end::<T>()` points here
2321 /// | (to the start of CT0 or to the end of T0)
2322 /// v v
2323 /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2324 /// ^ \__________ __________/
2325 /// `table.bucket(3)` returns a pointer that points \/
2326 /// here in the `data` part of the `RawTableInner` additional control bytes
2327 /// (to the end of T3) `m = Group::WIDTH - 1`
2328 ///
2329 /// where: T0...T_n - our stored data;
2330 /// CT0...CT_n - control bytes or metadata for `data`;
2331 /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2332 /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2333 /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2334 ///
2335 /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
2336 /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2337 /// ```
2338 ///
2339 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2340 #[inline]
2341 unsafe fn bucket<T>(&self, index: usize) -> Bucket<T> {
2342 debug_assert_ne!(self.bucket_mask, 0);
2343 debug_assert!(index < self.num_buckets());
2344 unsafe { Bucket::from_base_index(self.data_end(), index) }
2345 }
2346
2347 /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2348 /// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2349 ///
2350 /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2351 /// otherwise using it may result in [`undefined behavior`].
2352 ///
2353 /// # Safety
2354 ///
2355 /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2356 ///
2357 /// * The table must already be allocated;
2358 ///
2359 /// * The `index` must not be greater than the number returned by the [`RawTableInner::num_buckets`]
2360 /// function, i.e. `(index + 1) <= self.num_buckets()`;
2361 ///
2362 /// * The `size_of` must be equal to the size of the elements stored in the table;
2363 ///
2364 /// ```none
2365 /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2366 /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2367 /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::num_buckets() - 1"):
2368 ///
2369 /// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2370 /// `data` part of the `RawTableInner`, i.e. to the start of T3
2371 /// |
2372 /// | `base = table.data_end::<u8>()` points here
2373 /// | (to the start of CT0 or to the end of T0)
2374 /// v v
2375 /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2376 /// \__________ __________/
2377 /// \/
2378 /// additional control bytes
2379 /// `m = Group::WIDTH - 1`
2380 ///
2381 /// where: T0...T_n - our stored data;
2382 /// CT0...CT_n - control bytes or metadata for `data`;
2383 /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2384 /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2385 /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2386 ///
2387 /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
2388 /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2389 /// ```
2390 ///
2391 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2392 #[inline]
2393 unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
2394 debug_assert_ne!(self.bucket_mask, 0);
2395 debug_assert!(index < self.num_buckets());
2396 unsafe {
2397 let base: *mut u8 = self.data_end().as_ptr();
2398 base.sub((index + 1) * size_of)
2399 }
2400 }
2401
2402 /// Returns pointer to one past last `data` element in the table as viewed from
2403 /// the start point of the allocation (convenience for `self.ctrl.cast()`).
2404 ///
2405 /// This function actually returns a pointer to the end of the `data element` at
2406 /// index "0" (zero).
2407 ///
2408 /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2409 /// otherwise using it may result in [`undefined behavior`].
2410 ///
2411 /// # Note
2412 ///
2413 /// The type `T` must be the actual type of the elements stored in the table, otherwise
2414 /// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2415 ///
2416 /// ```none
2417 /// `table.data_end::<T>()` returns pointer that points here
2418 /// (to the end of `T0`)
2419 /// ∨
2420 /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2421 /// \________ ________/
2422 /// \/
2423 /// `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1`
2424 ///
2425 /// where: T0...T_n - our stored data;
2426 /// CT0...CT_n - control bytes or metadata for `data`.
2427 /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2428 /// with loading `Group` bytes from the heap works properly, even if the result
2429 /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2430 /// `RawTableInner::set_ctrl` function.
2431 ///
2432 /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.num_buckets()` because the number
2433 /// of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2434 /// ```
2435 ///
2436 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2437 #[inline]
2438 fn data_end<T>(&self) -> NonNull<T> {
2439 self.ctrl.cast()
2440 }
2441
2442 /// Returns an iterator-like object for a probe sequence on the table.
2443 ///
2444 /// This iterator never terminates, but is guaranteed to visit each bucket
2445 /// group exactly once. The loop using `probe_seq` must terminate upon
2446 /// reaching a group containing an empty bucket.
2447 #[inline]
2448 fn probe_seq(&self, hash: u64) -> ProbeSeq {
2449 ProbeSeq {
2450 // This is the same as `hash as usize % self.num_buckets()` because the number
2451 // of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2452 pos: h1(hash) & self.bucket_mask,
2453 stride: 0,
2454 }
2455 }
2456
2457 #[inline]
2458 unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, new_ctrl: Tag) {
2459 self.growth_left -= usize::from(old_ctrl.special_is_empty());
2460 unsafe {
2461 self.set_ctrl(index, new_ctrl);
2462 }
2463 self.items += 1;
2464 }
2465
2466 #[inline]
2467 fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool {
2468 let probe_seq_pos = self.probe_seq(hash).pos;
2469 let probe_index =
2470 |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH;
2471 probe_index(i) == probe_index(new_i)
2472 }
2473
2474 /// Sets a control byte to the hash, and possibly also the replicated control byte at
2475 /// the end of the array.
2476 ///
2477 /// This function does not make any changes to the `data` parts of the table,
2478 /// or any changes to the `items` or `growth_left` field of the table.
2479 ///
2480 /// # Safety
2481 ///
2482 /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl`]
2483 /// method. Thus, in order to uphold the safety contracts for the method, you must observe the
2484 /// following rules when calling this function:
2485 ///
2486 /// * The [`RawTableInner`] has already been allocated;
2487 ///
2488 /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2489 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2490 /// be no greater than the number returned by the function [`RawTableInner::num_buckets`].
2491 ///
2492 /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2493 ///
2494 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2495 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2496 ///
2497 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2498 #[inline]
2499 unsafe fn set_ctrl_hash(&mut self, index: usize, hash: u64) {
2500 unsafe {
2501 // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_hash`]
2502 self.set_ctrl(index, Tag::full(hash));
2503 }
2504 }
2505
2506 /// Replaces the hash in the control byte at the given index with the provided one,
2507 /// and possibly also replicates the new control byte at the end of the array of control
2508 /// bytes, returning the old control byte.
2509 ///
2510 /// This function does not make any changes to the `data` parts of the table,
2511 /// or any changes to the `items` or `growth_left` field of the table.
2512 ///
2513 /// # Safety
2514 ///
2515 /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_hash`]
2516 /// and [`RawTableInner::ctrl`] methods. Thus, in order to uphold the safety contracts for both
2517 /// methods, you must observe the following rules when calling this function:
2518 ///
2519 /// * The [`RawTableInner`] has already been allocated;
2520 ///
2521 /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2522 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2523 /// be no greater than the number returned by the function [`RawTableInner::num_buckets`].
2524 ///
2525 /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2526 ///
2527 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2528 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2529 ///
2530 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2531 #[inline]
2532 unsafe fn replace_ctrl_hash(&mut self, index: usize, hash: u64) -> Tag {
2533 unsafe {
2534 // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_hash`]
2535 let prev_ctrl = *self.ctrl(index);
2536 self.set_ctrl_hash(index, hash);
2537 prev_ctrl
2538 }
2539 }
2540
2541 /// Sets a control byte, and possibly also the replicated control byte at
2542 /// the end of the array.
2543 ///
2544 /// This function does not make any changes to the `data` parts of the table,
2545 /// or any changes to the `items` or `growth_left` field of the table.
2546 ///
2547 /// # Safety
2548 ///
2549 /// You must observe the following safety rules when calling this function:
2550 ///
2551 /// * The [`RawTableInner`] has already been allocated;
2552 ///
2553 /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2554 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2555 /// be no greater than the number returned by the function [`RawTableInner::num_buckets`].
2556 ///
2557 /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2558 ///
2559 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2560 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2561 ///
2562 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2563 #[inline]
2564 unsafe fn set_ctrl(&mut self, index: usize, ctrl: Tag) {
2565 // Replicate the first Group::WIDTH control bytes at the end of
2566 // the array without using a branch. If the tables smaller than
2567 // the group width (self.num_buckets() < Group::WIDTH),
2568 // `index2 = Group::WIDTH + index`, otherwise `index2` is:
2569 //
2570 // - If index >= Group::WIDTH then index == index2.
2571 // - Otherwise index2 == self.bucket_mask + 1 + index.
2572 //
2573 // The very last replicated control byte is never actually read because
2574 // we mask the initial index for unaligned loads, but we write it
2575 // anyways because it makes the set_ctrl implementation simpler.
2576 //
2577 // If there are fewer buckets than Group::WIDTH then this code will
2578 // replicate the buckets at the end of the trailing group. For example
2579 // with 2 buckets and a group size of 4, the control bytes will look
2580 // like this:
2581 //
2582 // Real | Replicated
2583 // ---------------------------------------------
2584 // | [A] | [B] | [Tag::EMPTY] | [EMPTY] | [A] | [B] |
2585 // ---------------------------------------------
2586
2587 // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.num_buckets() + Group::WIDTH`
2588 // because the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
2589 let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH;
2590
2591 // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl`]
2592 unsafe {
2593 *self.ctrl(index) = ctrl;
2594 *self.ctrl(index2) = ctrl;
2595 }
2596 }
2597
2598 /// Returns a pointer to a control byte.
2599 ///
2600 /// # Safety
2601 ///
2602 /// For the allocated [`RawTableInner`], the result is [`Undefined Behavior`],
2603 /// if the `index` is greater than the `self.bucket_mask + 1 + Group::WIDTH`.
2604 /// In that case, calling this function with `index == self.bucket_mask + 1 + Group::WIDTH`
2605 /// will return a pointer to the end of the allocated table and it is useless on its own.
2606 ///
2607 /// Calling this function with `index >= self.bucket_mask + 1 + Group::WIDTH` on a
2608 /// table that has not been allocated results in [`Undefined Behavior`].
2609 ///
2610 /// So to satisfy both requirements you should always follow the rule that
2611 /// `index < self.bucket_mask + 1 + Group::WIDTH`
2612 ///
2613 /// Calling this function on [`RawTableInner`] that are not already allocated is safe
2614 /// for read-only purpose.
2615 ///
2616 /// See also [`Bucket::as_ptr()`] method, for more information about of properly removing
2617 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2618 ///
2619 /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2620 #[inline]
2621 unsafe fn ctrl(&self, index: usize) -> *mut Tag {
2622 debug_assert!(index < self.num_ctrl_bytes());
2623 // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::ctrl`]
2624 unsafe { self.ctrl.as_ptr().add(index).cast() }
2625 }
2626
2627 /// Gets the slice of all control bytes, as possibily uninitialized tags.
2628 fn ctrl_slice(&mut self) -> &mut [mem::MaybeUninit<Tag>] {
2629 // SAFETY: We have the correct number of control bytes.
2630 unsafe { slice::from_raw_parts_mut(self.ctrl.as_ptr().cast(), self.num_ctrl_bytes()) }
2631 }
2632
2633 #[inline]
2634 fn num_buckets(&self) -> usize {
2635 self.bucket_mask + 1
2636 }
2637
2638 /// Checks whether the bucket at `index` is full.
2639 ///
2640 /// # Safety
2641 ///
2642 /// The caller must ensure `index` is less than the number of buckets.
2643 #[inline]
2644 unsafe fn is_bucket_full(&self, index: usize) -> bool {
2645 debug_assert!(index < self.num_buckets());
2646 unsafe { (*self.ctrl(index)).is_full() }
2647 }
2648
2649 #[inline]
2650 fn num_ctrl_bytes(&self) -> usize {
2651 self.bucket_mask + 1 + Group::WIDTH
2652 }
2653
2654 #[inline]
2655 fn is_empty_singleton(&self) -> bool {
2656 self.bucket_mask == 0
2657 }
2658
2659 /// Attempts to allocate a new hash table with at least enough capacity
2660 /// for inserting the given number of elements without reallocating,
2661 /// and return it inside `ScopeGuard` to protect against panic in the hash
2662 /// function.
2663 ///
2664 /// # Note
2665 ///
2666 /// It is recommended (but not required):
2667 ///
2668 /// * That the new table's `capacity` be greater than or equal to `self.items`.
2669 ///
2670 /// * The `alloc` is the same [`Allocator`] as the `Allocator` used
2671 /// to allocate this table.
2672 ///
2673 /// * The `table_layout` is the same [`TableLayout`] as the `TableLayout` used
2674 /// to allocate this table.
2675 ///
2676 /// If `table_layout` does not match the `TableLayout` that was used to allocate
2677 /// this table, then using `mem::swap` with the `self` and the new table returned
2678 /// by this function results in [`undefined behavior`].
2679 ///
2680 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2681 #[inline]
2682 fn prepare_resize<'a, A>(
2683 &self,
2684 alloc: &'a A,
2685 table_layout: TableLayout,
2686 capacity: usize,
2687 fallibility: Fallibility,
2688 ) -> Result<crate::scopeguard::ScopeGuard<Self, impl FnMut(&mut Self) + 'a>, TryReserveError>
2689 where
2690 A: Allocator,
2691 {
2692 debug_assert!(self.items <= capacity);
2693
2694 // Allocate and initialize the new table.
2695 let new_table =
2696 RawTableInner::fallible_with_capacity(alloc, table_layout, capacity, fallibility)?;
2697
2698 // The hash function may panic, in which case we simply free the new
2699 // table without dropping any elements that may have been copied into
2700 // it.
2701 //
2702 // This guard is also used to free the old table on success, see
2703 // the comment at the bottom of this function.
2704 Ok(guard(new_table, move |self_| {
2705 if !self_.is_empty_singleton() {
2706 // SAFETY:
2707 // 1. We have checked that our table is allocated.
2708 // 2. We know for sure that the `alloc` and `table_layout` matches the
2709 // [`Allocator`] and [`TableLayout`] used to allocate this table.
2710 unsafe { self_.free_buckets(alloc, table_layout) };
2711 }
2712 }))
2713 }
2714
2715 /// Reserves or rehashes to make room for `additional` more elements.
2716 ///
2717 /// This uses dynamic dispatch to reduce the amount of
2718 /// code generated, but it is eliminated by LLVM optimizations when inlined.
2719 ///
2720 /// # Safety
2721 ///
2722 /// If any of the following conditions are violated, the result is
2723 /// [`undefined behavior`]:
2724 ///
2725 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used
2726 /// to allocate this table.
2727 ///
2728 /// * The `layout` must be the same [`TableLayout`] as the `TableLayout`
2729 /// used to allocate this table.
2730 ///
2731 /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of
2732 /// the elements stored in the table.
2733 ///
2734 /// * The [`RawTableInner`] must have properly initialized control bytes.
2735 ///
2736 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2737 #[expect(clippy::inline_always)]
2738 #[inline(always)]
2739 unsafe fn reserve_rehash_inner<A>(
2740 &mut self,
2741 alloc: &A,
2742 additional: usize,
2743 hasher: &dyn Fn(&mut Self, usize) -> u64,
2744 fallibility: Fallibility,
2745 layout: TableLayout,
2746 drop: Option<unsafe fn(*mut u8)>,
2747 ) -> Result<(), TryReserveError>
2748 where
2749 A: Allocator,
2750 {
2751 // Avoid `Option::ok_or_else` because it bloats LLVM IR.
2752 let Some(new_items) = self.items.checked_add(additional) else {
2753 return Err(fallibility.capacity_overflow());
2754 };
2755 let full_capacity = bucket_mask_to_capacity(self.bucket_mask);
2756 if new_items <= full_capacity / 2 {
2757 // Rehash in-place without re-allocating if we have plenty of spare
2758 // capacity that is locked up due to DELETED entries.
2759
2760 // SAFETY:
2761 // 1. We know for sure that `[`RawTableInner`]` has already been allocated
2762 // (since new_items <= full_capacity / 2);
2763 // 2. The caller ensures that `drop` function is the actual drop function of
2764 // the elements stored in the table.
2765 // 3. The caller ensures that `layout` matches the [`TableLayout`] that was
2766 // used to allocate this table.
2767 // 4. The caller ensures that the control bytes of the `RawTableInner`
2768 // are already initialized.
2769 unsafe {
2770 self.rehash_in_place(hasher, layout.size, drop);
2771 }
2772 Ok(())
2773 } else {
2774 // Otherwise, conservatively resize to at least the next size up
2775 // to avoid churning deletes into frequent rehashes.
2776 //
2777 // SAFETY:
2778 // 1. We know for sure that `capacity >= self.items`.
2779 // 2. The caller ensures that `alloc` and `layout` matches the [`Allocator`] and
2780 // [`TableLayout`] that were used to allocate this table.
2781 // 3. The caller ensures that the control bytes of the `RawTableInner`
2782 // are already initialized.
2783 unsafe {
2784 self.resize_inner(
2785 alloc,
2786 usize::max(new_items, full_capacity + 1),
2787 hasher,
2788 fallibility,
2789 layout,
2790 )
2791 }
2792 }
2793 }
2794
2795 /// Returns an iterator over full buckets indices in the table.
2796 ///
2797 /// # Safety
2798 ///
2799 /// Behavior is undefined if any of the following conditions are violated:
2800 ///
2801 /// * The caller has to ensure that the `RawTableInner` outlives the
2802 /// `FullBucketsIndices`. Because we cannot make the `next` method
2803 /// unsafe on the `FullBucketsIndices` struct, we have to make the
2804 /// `full_buckets_indices` method unsafe.
2805 ///
2806 /// * The [`RawTableInner`] must have properly initialized control bytes.
2807 #[inline(always)]
2808 unsafe fn full_buckets_indices(&self) -> FullBucketsIndices {
2809 // SAFETY:
2810 // 1. Since the caller of this function ensures that the control bytes
2811 // are properly initialized and `self.ctrl(0)` points to the start
2812 // of the array of control bytes, therefore: `ctrl` is valid for reads,
2813 // properly aligned to `Group::WIDTH` and points to the properly initialized
2814 // control bytes.
2815 // 2. The value of `items` is equal to the amount of data (values) added
2816 // to the table.
2817 //
2818 // `ctrl` points here (to the start
2819 // of the first control byte `CT0`)
2820 // ∨
2821 // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2822 // \________ ________/
2823 // \/
2824 // `n = buckets - 1`, i.e. `RawTableInner::num_buckets() - 1`
2825 //
2826 // where: T0...T_n - our stored data;
2827 // CT0...CT_n - control bytes or metadata for `data`.
2828 unsafe {
2829 let ctrl = NonNull::new_unchecked(self.ctrl(0).cast::<u8>());
2830
2831 FullBucketsIndices {
2832 // Load the first group
2833 // SAFETY: See explanation above.
2834 current_group: Group::load_aligned(ctrl.as_ptr().cast())
2835 .match_full()
2836 .into_iter(),
2837 group_first_index: 0,
2838 ctrl,
2839 items: self.items,
2840 }
2841 }
2842 }
2843
2844 /// Allocates a new table of a different size and moves the contents of the
2845 /// current table into it.
2846 ///
2847 /// This uses dynamic dispatch to reduce the amount of
2848 /// code generated, but it is eliminated by LLVM optimizations when inlined.
2849 ///
2850 /// # Safety
2851 ///
2852 /// If any of the following conditions are violated, the result is
2853 /// [`undefined behavior`]:
2854 ///
2855 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` used
2856 /// to allocate this table;
2857 ///
2858 /// * The `layout` must be the same [`TableLayout`] as the `TableLayout`
2859 /// used to allocate this table;
2860 ///
2861 /// * The [`RawTableInner`] must have properly initialized control bytes.
2862 ///
2863 /// The caller of this function must ensure that `capacity >= self.items`
2864 /// otherwise:
2865 ///
2866 /// * If `self.items != 0`, calling of this function with `capacity == 0`
2867 /// results in [`undefined behavior`].
2868 ///
2869 /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
2870 /// `self.items > capacity_to_buckets(capacity)` calling this function
2871 /// results in [`undefined behavior`].
2872 ///
2873 /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
2874 /// `self.items > capacity_to_buckets(capacity)` calling this function
2875 /// are never return (will go into an infinite loop).
2876 ///
2877 /// Note: It is recommended (but not required) that the new table's `capacity`
2878 /// be greater than or equal to `self.items`. In case if `capacity <= self.items`
2879 /// this function can never return. See [`RawTableInner::find_insert_index`] for
2880 /// more information.
2881 ///
2882 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2883 #[expect(clippy::inline_always)]
2884 #[inline(always)]
2885 unsafe fn resize_inner<A>(
2886 &mut self,
2887 alloc: &A,
2888 capacity: usize,
2889 hasher: &dyn Fn(&mut Self, usize) -> u64,
2890 fallibility: Fallibility,
2891 layout: TableLayout,
2892 ) -> Result<(), TryReserveError>
2893 where
2894 A: Allocator,
2895 {
2896 // SAFETY: We know for sure that `alloc` and `layout` matches the [`Allocator`] and [`TableLayout`]
2897 // that were used to allocate this table.
2898 let mut new_table = self.prepare_resize(alloc, layout, capacity, fallibility)?;
2899
2900 // SAFETY: We know for sure that RawTableInner will outlive the
2901 // returned `FullBucketsIndices` iterator, and the caller of this
2902 // function ensures that the control bytes are properly initialized.
2903 unsafe {
2904 for full_byte_index in self.full_buckets_indices() {
2905 // This may panic.
2906 let hash = hasher(self, full_byte_index);
2907
2908 // SAFETY:
2909 // We can use a simpler version of insert() here since:
2910 // 1. There are no DELETED entries.
2911 // 2. We know there is enough space in the table.
2912 // 3. All elements are unique.
2913 // 4. The caller of this function guarantees that `capacity > 0`
2914 // so `new_table` must already have some allocated memory.
2915 // 5. We set `growth_left` and `items` fields of the new table
2916 // after the loop.
2917 // 6. We insert into the table, at the returned index, the data
2918 // matching the given hash immediately after calling this function.
2919 let (new_index, _) = new_table.prepare_insert_index(hash);
2920
2921 // SAFETY:
2922 //
2923 // * `src` is valid for reads of `layout.size` bytes, since the
2924 // table is alive and the `full_byte_index` is guaranteed to be
2925 // within bounds (see `FullBucketsIndices::next_impl`);
2926 //
2927 // * `dst` is valid for writes of `layout.size` bytes, since the
2928 // caller ensures that `table_layout` matches the [`TableLayout`]
2929 // that was used to allocate old table and we have the `new_index`
2930 // returned by `prepare_insert_index`.
2931 //
2932 // * Both `src` and `dst` are properly aligned.
2933 //
2934 // * Both `src` and `dst` point to different region of memory.
2935 ptr::copy_nonoverlapping(
2936 self.bucket_ptr(full_byte_index, layout.size),
2937 new_table.bucket_ptr(new_index, layout.size),
2938 layout.size,
2939 );
2940 }
2941 }
2942
2943 // The hash function didn't panic, so we can safely set the
2944 // `growth_left` and `items` fields of the new table.
2945 new_table.growth_left -= self.items;
2946 new_table.items = self.items;
2947
2948 // We successfully copied all elements without panicking. Now replace
2949 // self with the new table. The old table will have its memory freed but
2950 // the items will not be dropped (since they have been moved into the
2951 // new table).
2952 // SAFETY: The caller ensures that `table_layout` matches the [`TableLayout`]
2953 // that was used to allocate this table.
2954 mem::swap(self, &mut new_table);
2955
2956 Ok(())
2957 }
2958
2959 /// Rehashes the contents of the table in place (i.e. without changing the
2960 /// allocation).
2961 ///
2962 /// If `hasher` panics then some the table's contents may be lost.
2963 ///
2964 /// This uses dynamic dispatch to reduce the amount of
2965 /// code generated, but it is eliminated by LLVM optimizations when inlined.
2966 ///
2967 /// # Safety
2968 ///
2969 /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2970 ///
2971 /// * The `size_of` must be equal to the size of the elements stored in the table;
2972 ///
2973 /// * The `drop` function (`fn(*mut u8)`) must be the actual drop function of
2974 /// the elements stored in the table.
2975 ///
2976 /// * The [`RawTableInner`] has already been allocated;
2977 ///
2978 /// * The [`RawTableInner`] must have properly initialized control bytes.
2979 ///
2980 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
2981 #[cfg_attr(feature = "inline-more", expect(clippy::inline_always))]
2982 #[cfg_attr(feature = "inline-more", inline(always))]
2983 #[cfg_attr(not(feature = "inline-more"), inline)]
2984 unsafe fn rehash_in_place(
2985 &mut self,
2986 hasher: &dyn Fn(&mut Self, usize) -> u64,
2987 size_of: usize,
2988 drop: Option<unsafe fn(*mut u8)>,
2989 ) {
2990 // If the hash function panics then properly clean up any elements
2991 // that we haven't rehashed yet. We unfortunately can't preserve the
2992 // element since we lost their hash and have no way of recovering it
2993 // without risking another panic.
2994 unsafe {
2995 self.prepare_rehash_in_place();
2996 }
2997
2998 let mut guard = guard(self, move |self_| {
2999 for i in 0..self_.num_buckets() {
3000 unsafe {
3001 // Any elements that haven't been rehashed yet have a
3002 // DELETED tag. These need to be dropped and have their tag
3003 // reset to EMPTY.
3004 if *self_.ctrl(i) == Tag::DELETED {
3005 self_.set_ctrl(i, Tag::EMPTY);
3006 if let Some(drop) = drop {
3007 drop(self_.bucket_ptr(i, size_of));
3008 }
3009 self_.items -= 1;
3010 }
3011 }
3012 }
3013 self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
3014 });
3015
3016 // At this point, DELETED elements are elements that we haven't
3017 // rehashed yet. Find them and re-insert them at their ideal
3018 // position.
3019 'outer: for i in 0..guard.num_buckets() {
3020 unsafe {
3021 if *guard.ctrl(i) != Tag::DELETED {
3022 continue;
3023 }
3024 }
3025
3026 let i_p = unsafe { guard.bucket_ptr(i, size_of) };
3027
3028 loop {
3029 // Hash the current item
3030 let hash = hasher(*guard, i);
3031
3032 // Search for a suitable place to put it
3033 //
3034 // SAFETY: Caller of this function ensures that the control bytes
3035 // are properly initialized.
3036 let new_i = unsafe { guard.find_insert_index(hash) };
3037
3038 // Probing works by scanning through all of the control
3039 // bytes in groups, which may not be aligned to the group
3040 // size. If both the new and old position fall within the
3041 // same unaligned group, then there is no benefit in moving
3042 // it and we can just continue to the next item.
3043 if likely(guard.is_in_same_group(i, new_i, hash)) {
3044 unsafe { guard.set_ctrl_hash(i, hash) };
3045 continue 'outer;
3046 }
3047
3048 let new_i_p = unsafe { guard.bucket_ptr(new_i, size_of) };
3049
3050 // We are moving the current item to a new position. Write
3051 // our H2 to the control byte of the new position.
3052 let prev_ctrl = unsafe { guard.replace_ctrl_hash(new_i, hash) };
3053 if prev_ctrl == Tag::EMPTY {
3054 unsafe { guard.set_ctrl(i, Tag::EMPTY) };
3055 // If the target slot is empty, simply move the current
3056 // element into the new slot and clear the old control
3057 // byte.
3058 unsafe {
3059 ptr::copy_nonoverlapping(i_p, new_i_p, size_of);
3060 }
3061 continue 'outer;
3062 }
3063
3064 // If the target slot is occupied, swap the two elements
3065 // and then continue processing the element that we just
3066 // swapped into the old slot.
3067 debug_assert_eq!(prev_ctrl, Tag::DELETED);
3068 unsafe {
3069 ptr::swap_nonoverlapping(i_p, new_i_p, size_of);
3070 }
3071 }
3072 }
3073
3074 guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;
3075
3076 mem::forget(guard);
3077 }
3078
3079 /// Deallocates the table without dropping any entries.
3080 ///
3081 /// # Note
3082 ///
3083 /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements),
3084 /// else it can lead to leaking of memory. Also calling this function automatically
3085 /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
3086 /// (dangling) the `ctrl` field of the table.
3087 ///
3088 /// # Safety
3089 ///
3090 /// If any of the following conditions are violated, the result is [`Undefined Behavior`]:
3091 ///
3092 /// * The [`RawTableInner`] has already been allocated;
3093 ///
3094 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
3095 /// to allocate this table.
3096 ///
3097 /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout` that was used
3098 /// to allocate this table.
3099 ///
3100 /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information.
3101 ///
3102 /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3103 /// [`GlobalAlloc::dealloc`]: stdalloc::alloc::GlobalAlloc::dealloc
3104 /// [`Allocator::deallocate`]: stdalloc::alloc::Allocator::deallocate
3105 #[inline]
3106 unsafe fn free_buckets<A>(&mut self, alloc: &A, table_layout: TableLayout)
3107 where
3108 A: Allocator,
3109 {
3110 unsafe {
3111 // SAFETY: The caller must uphold the safety contract for `free_buckets`
3112 // method.
3113 let (ptr, layout) = self.allocation_info(table_layout);
3114 alloc.deallocate(ptr, layout);
3115 }
3116 }
3117
3118 /// Returns a pointer to the allocated memory and the layout that was used to
3119 /// allocate the table.
3120 ///
3121 /// # Safety
3122 ///
3123 /// Caller of this function must observe the following safety rules:
3124 ///
3125 /// * The [`RawTableInner`] has already been allocated, otherwise
3126 /// calling this function results in [`undefined behavior`]
3127 ///
3128 /// * The `table_layout` must be the same [`TableLayout`] as the `TableLayout`
3129 /// that was used to allocate this table. Failure to comply with this condition
3130 /// may result in [`undefined behavior`].
3131 ///
3132 /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information.
3133 ///
3134 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3135 /// [`GlobalAlloc::dealloc`]: stdalloc::GlobalAlloc::dealloc
3136 /// [`Allocator::deallocate`]: stdalloc::Allocator::deallocate
3137 #[inline]
3138 unsafe fn allocation_info(&self, table_layout: TableLayout) -> (NonNull<u8>, Layout) {
3139 debug_assert!(
3140 !self.is_empty_singleton(),
3141 "this function can only be called on non-empty tables"
3142 );
3143
3144 let (layout, ctrl_offset) = {
3145 let option = table_layout.calculate_layout_for(self.num_buckets());
3146 unsafe { option.unwrap_unchecked() }
3147 };
3148 (
3149 // SAFETY: The caller must uphold the safety contract for `allocation_info` method.
3150 unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) },
3151 layout,
3152 )
3153 }
3154
3155 /// Returns the total amount of memory allocated internally by the hash
3156 /// table, in bytes.
3157 ///
3158 /// The returned number is informational only. It is intended to be
3159 /// primarily used for memory profiling.
3160 ///
3161 /// # Safety
3162 ///
3163 /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout`
3164 /// that was used to allocate this table. Failure to comply with this condition
3165 /// may result in [`undefined behavior`].
3166 ///
3167 ///
3168 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3169 #[inline]
3170 unsafe fn allocation_size_or_zero(&self, table_layout: TableLayout) -> usize {
3171 if self.is_empty_singleton() {
3172 0
3173 } else {
3174 // SAFETY:
3175 // 1. We have checked that our table is allocated.
3176 // 2. The caller ensures that `table_layout` matches the [`TableLayout`]
3177 // that was used to allocate this table.
3178 unsafe { self.allocation_info(table_layout).1.size() }
3179 }
3180 }
3181
3182 /// Marks all table buckets as empty without dropping their contents.
3183 #[inline]
3184 fn clear_no_drop(&mut self) {
3185 if !self.is_empty_singleton() {
3186 self.ctrl_slice().fill_empty();
3187 }
3188 self.items = 0;
3189 self.growth_left = bucket_mask_to_capacity(self.bucket_mask);
3190 }
3191
3192 /// Erases the [`Bucket`]'s control byte at the given index so that it does not
3193 /// triggered as full, decreases the `items` of the table and, if it can be done,
3194 /// increases `self.growth_left`.
3195 ///
3196 /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
3197 /// does not make any changes to the `data` parts of the table. The caller of this
3198 /// function must take care to properly drop the `data`, otherwise calling this
3199 /// function may result in a memory leak.
3200 ///
3201 /// # Safety
3202 ///
3203 /// You must observe the following safety rules when calling this function:
3204 ///
3205 /// * The [`RawTableInner`] has already been allocated;
3206 ///
3207 /// * It must be the full control byte at the given position;
3208 ///
3209 /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
3210 /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
3211 /// be no greater than the number returned by the function [`RawTableInner::num_buckets`].
3212 ///
3213 /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
3214 ///
3215 /// Calling this function on a table with no elements is unspecified, but calling subsequent
3216 /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
3217 /// (`self.items -= 1 cause overflow when self.items == 0`).
3218 ///
3219 /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
3220 /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
3221 ///
3222 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3223 #[inline]
3224 unsafe fn erase(&mut self, index: usize) {
3225 unsafe {
3226 debug_assert!(self.is_bucket_full(index));
3227 }
3228
3229 // This is the same as `index.wrapping_sub(Group::WIDTH) % self.num_buckets()` because
3230 // the number of buckets is a power of two, and `self.bucket_mask = self.num_buckets() - 1`.
3231 let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask;
3232 // SAFETY:
3233 // - The caller must uphold the safety contract for `erase` method;
3234 // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
3235 let (empty_before, empty_after) = unsafe {
3236 (
3237 Group::load(self.ctrl(index_before)).match_empty(),
3238 Group::load(self.ctrl(index)).match_empty(),
3239 )
3240 };
3241
3242 // Inserting and searching in the map is performed by two key functions:
3243 //
3244 // - The `find_insert_index` function that looks up the index of any `Tag::EMPTY` or `Tag::DELETED`
3245 // slot in a group to be able to insert. If it doesn't find an `Tag::EMPTY` or `Tag::DELETED`
3246 // slot immediately in the first group, it jumps to the next `Group` looking for it,
3247 // and so on until it has gone through all the groups in the control bytes.
3248 //
3249 // - The `find_inner` function that looks for the index of the desired element by looking
3250 // at all the `FULL` bytes in the group. If it did not find the element right away, and
3251 // there is no `Tag::EMPTY` byte in the group, then this means that the `find_insert_index`
3252 // function may have found a suitable slot in the next group. Therefore, `find_inner`
3253 // jumps further, and if it does not find the desired element and again there is no `Tag::EMPTY`
3254 // byte, then it jumps further, and so on. The search stops only if `find_inner` function
3255 // finds the desired element or hits an `Tag::EMPTY` slot/byte.
3256 //
3257 // Accordingly, this leads to two consequences:
3258 //
3259 // - The map must have `Tag::EMPTY` slots (bytes);
3260 //
3261 // - You can't just mark the byte to be erased as `Tag::EMPTY`, because otherwise the `find_inner`
3262 // function may stumble upon an `Tag::EMPTY` byte before finding the desired element and stop
3263 // searching.
3264 //
3265 // Thus it is necessary to check all bytes after and before the erased element. If we are in
3266 // a contiguous `Group` of `FULL` or `Tag::DELETED` bytes (the number of `FULL` or `Tag::DELETED` bytes
3267 // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
3268 // `Tag::DELETED` in order for the `find_inner` function to go further. On the other hand, if there
3269 // is at least one `Tag::EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
3270 // upon an `Tag::EMPTY` byte, so we can safely mark our erased byte as `Tag::EMPTY` as well.
3271 //
3272 // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
3273 // and given all of the above, tables smaller than the group width (self.num_buckets() < Group::WIDTH)
3274 // cannot have `Tag::DELETED` bytes.
3275 //
3276 // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
3277 // `trailing_zeros` refers to the bytes at the beginning of a group.
3278 let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH {
3279 Tag::DELETED
3280 } else {
3281 self.growth_left += 1;
3282 Tag::EMPTY
3283 };
3284 // SAFETY: the caller must uphold the safety contract for `erase` method.
3285 unsafe {
3286 self.set_ctrl(index, ctrl);
3287 }
3288 self.items -= 1;
3289 }
3290}
3291
3292impl<T: Clone, A: Allocator + Clone> Clone for RawTable<T, A> {
3293 fn clone(&self) -> Self {
3294 if self.table.is_empty_singleton() {
3295 Self::new_in(self.alloc.clone())
3296 } else {
3297 // SAFETY: This is safe as we are taking the size of an already allocated table
3298 // and therefore capacity overflow cannot occur, `self.table.num_buckets()` is power
3299 // of two and all allocator errors will be caught inside `RawTableInner::new_uninitialized`.
3300 let result = unsafe {
3301 Self::new_uninitialized(
3302 self.alloc.clone(),
3303 self.table.num_buckets(),
3304 Fallibility::Infallible,
3305 )
3306 };
3307
3308 // SAFETY: The result of calling the `new_uninitialized` function cannot be an error
3309 // because `fallibility == Fallibility::Infallible.
3310 let mut new_table = unsafe { result.unwrap_unchecked() };
3311
3312 // SAFETY:
3313 // Cloning elements may fail (the clone function may panic). But we don't
3314 // need to worry about uninitialized control bits, since:
3315 // 1. The number of items (elements) in the table is zero, which means that
3316 // the control bits will not be read by Drop function.
3317 // 2. The `clone_from_spec` method will first copy all control bits from
3318 // `self` (thus initializing them). But this will not affect the `Drop`
3319 // function, since the `clone_from_spec` function sets `items` only after
3320 // successfully cloning all elements.
3321 unsafe { new_table.clone_from_spec(self) };
3322 new_table
3323 }
3324 }
3325
3326 fn clone_from(&mut self, source: &Self) {
3327 if source.table.is_empty_singleton() {
3328 let mut old_inner = mem::replace(&mut self.table, RawTableInner::NEW);
3329 unsafe {
3330 // SAFETY:
3331 // 1. We call the function only once;
3332 // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
3333 // and [`TableLayout`] that were used to allocate this table.
3334 // 3. If any elements' drop function panics, then there will only be a memory leak,
3335 // because we have replaced the inner table with a new one.
3336 old_inner.drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
3337 }
3338 } else {
3339 unsafe {
3340 // Make sure that if any panics occurs, we clear the table and
3341 // leave it in an empty state.
3342 let mut self_ = guard(self, |self_| {
3343 self_.clear_no_drop();
3344 });
3345
3346 // First, drop all our elements without clearing the control
3347 // bytes. If this panics then the scope guard will clear the
3348 // table, leaking any elements that were not dropped yet.
3349 //
3350 // This leak is unavoidable: we can't try dropping more elements
3351 // since this could lead to another panic and abort the process.
3352 //
3353 // SAFETY: If something gets wrong we clear our table right after
3354 // dropping the elements, so there is no double drop, since `items`
3355 // will be equal to zero.
3356 self_.table.drop_elements::<T>();
3357
3358 // If necessary, resize our table to match the source.
3359 if self_.num_buckets() != source.num_buckets() {
3360 let new_inner = {
3361 let result = RawTableInner::new_uninitialized(
3362 &self_.alloc,
3363 Self::TABLE_LAYOUT,
3364 source.num_buckets(),
3365 Fallibility::Infallible,
3366 );
3367 result.unwrap_unchecked()
3368 };
3369 // Replace the old inner with new uninitialized one. It's ok, since if something gets
3370 // wrong `ScopeGuard` will initialize all control bytes and leave empty table.
3371 let mut old_inner = mem::replace(&mut self_.table, new_inner);
3372 if !old_inner.is_empty_singleton() {
3373 // SAFETY:
3374 // 1. We have checked that our table is allocated.
3375 // 2. We know for sure that `alloc` and `table_layout` matches
3376 // the [`Allocator`] and [`TableLayout`] that were used to allocate this table.
3377 old_inner.free_buckets(&self_.alloc, Self::TABLE_LAYOUT);
3378 }
3379 }
3380
3381 // Cloning elements may fail (the clone function may panic), but the `ScopeGuard`
3382 // inside the `clone_from_impl` function will take care of that, dropping all
3383 // cloned elements if necessary. Our `ScopeGuard` will clear the table.
3384 self_.clone_from_spec(source);
3385
3386 // Disarm the scope guard if cloning was successful.
3387 ScopeGuard::into_inner(self_);
3388 }
3389 }
3390 }
3391}
3392
3393/// Specialization of `clone_from` for `Copy` types
3394trait RawTableClone {
3395 unsafe fn clone_from_spec(&mut self, source: &Self);
3396}
3397impl<T: Clone, A: Allocator + Clone> RawTableClone for RawTable<T, A> {
3398 default_fn! {
3399 #[cfg_attr(feature = "inline-more", inline)]
3400 unsafe fn clone_from_spec(&mut self, source: &Self) {
3401 unsafe {
3402 self.clone_from_impl(source);
3403 }
3404 }
3405 }
3406}
3407#[cfg(feature = "nightly")]
3408impl<T: core::clone::TrivialClone, A: Allocator + Clone> RawTableClone for RawTable<T, A> {
3409 #[cfg_attr(feature = "inline-more", inline)]
3410 unsafe fn clone_from_spec(&mut self, source: &Self) {
3411 unsafe {
3412 source
3413 .table
3414 .ctrl(0)
3415 .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes());
3416 source
3417 .data_start()
3418 .as_ptr()
3419 .copy_to_nonoverlapping(self.data_start().as_ptr(), self.table.num_buckets());
3420 }
3421
3422 self.table.items = source.table.items;
3423 self.table.growth_left = source.table.growth_left;
3424 }
3425}
3426
3427impl<T: Clone, A: Allocator + Clone> RawTable<T, A> {
3428 /// Common code for `clone` and `clone_from`. Assumes:
3429 /// - `self.num_buckets() == source.num_buckets()`.
3430 /// - Any existing elements have been dropped.
3431 /// - The control bytes are not initialized yet.
3432 #[cfg_attr(feature = "inline-more", inline)]
3433 unsafe fn clone_from_impl(&mut self, source: &Self) {
3434 // Copy the control bytes unchanged. We do this in a single pass
3435 unsafe {
3436 source
3437 .table
3438 .ctrl(0)
3439 .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes());
3440 }
3441
3442 // The cloning of elements may panic, in which case we need
3443 // to make sure we drop only the elements that have been
3444 // cloned so far.
3445 let mut guard = guard((0, &mut *self), |(index, self_)| {
3446 if T::NEEDS_DROP {
3447 for i in 0..*index {
3448 unsafe {
3449 if self_.is_bucket_full(i) {
3450 self_.bucket(i).drop();
3451 }
3452 }
3453 }
3454 }
3455 });
3456
3457 unsafe {
3458 for from in source.iter() {
3459 let index = source.bucket_index(&from);
3460 let to = guard.1.bucket(index);
3461 to.write(from.as_ref().clone());
3462
3463 // Update the index in case we need to unwind.
3464 guard.0 = index + 1;
3465 }
3466 }
3467
3468 // Successfully cloned all items, no need to clean up.
3469 mem::forget(guard);
3470
3471 self.table.items = source.table.items;
3472 self.table.growth_left = source.table.growth_left;
3473 }
3474}
3475
3476impl<T, A: Allocator + Default> Default for RawTable<T, A> {
3477 #[inline]
3478 fn default() -> Self {
3479 Self::new_in(Default::default())
3480 }
3481}
3482
3483#[cfg(feature = "nightly")]
3484unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawTable<T, A> {
3485 #[cfg_attr(feature = "inline-more", inline)]
3486 fn drop(&mut self) {
3487 // SAFETY:
3488 // 1. We call the function only once;
3489 // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
3490 // and [`TableLayout`] that were used to allocate this table.
3491 // 3. If the drop function of any elements fails, then only a memory leak will occur,
3492 // and we don't care because we are inside the `Drop` function of the `RawTable`,
3493 // so there won't be any table left in an inconsistent state.
3494 unsafe {
3495 self.table
3496 .drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
3497 }
3498 }
3499}
3500#[cfg(not(feature = "nightly"))]
3501impl<T, A: Allocator> Drop for RawTable<T, A> {
3502 #[cfg_attr(feature = "inline-more", inline)]
3503 fn drop(&mut self) {
3504 // SAFETY:
3505 // 1. We call the function only once;
3506 // 2. We know for sure that `alloc` and `table_layout` matches the [`Allocator`]
3507 // and [`TableLayout`] that were used to allocate this table.
3508 // 3. If the drop function of any elements fails, then only a memory leak will occur,
3509 // and we don't care because we are inside the `Drop` function of the `RawTable`,
3510 // so there won't be any table left in an inconsistent state.
3511 unsafe {
3512 self.table
3513 .drop_inner_table::<T, _>(&self.alloc, Self::TABLE_LAYOUT);
3514 }
3515 }
3516}
3517
3518impl<T, A: Allocator> IntoIterator for RawTable<T, A> {
3519 type Item = T;
3520 type IntoIter = RawIntoIter<T, A>;
3521
3522 #[cfg_attr(feature = "inline-more", inline)]
3523 fn into_iter(self) -> RawIntoIter<T, A> {
3524 unsafe {
3525 let iter = self.iter();
3526 self.into_iter_from(iter)
3527 }
3528 }
3529}
3530
3531/// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does
3532/// not track an item count.
3533pub(crate) struct RawIterRange<T> {
3534 // Mask of full buckets in the current group. Bits are cleared from this
3535 // mask as each element is processed.
3536 current_group: BitMaskIter,
3537
3538 // Pointer to the buckets for the current group.
3539 data: Bucket<T>,
3540
3541 // Pointer to the next group of control bytes,
3542 // Must be aligned to the group size.
3543 next_ctrl: *const u8,
3544
3545 // Pointer one past the last control byte of this range.
3546 end: *const u8,
3547}
3548
3549impl<T> RawIterRange<T> {
3550 /// Returns a `RawIterRange` covering a subset of a table.
3551 ///
3552 /// # Safety
3553 ///
3554 /// If any of the following conditions are violated, the result is
3555 /// [`undefined behavior`]:
3556 ///
3557 /// * `ctrl` must be valid for reads, i.e. table outlives the `RawIterRange`;
3558 ///
3559 /// * `ctrl` must be properly aligned to the group size (`Group::WIDTH`);
3560 ///
3561 /// * `ctrl` must point to the array of properly initialized control bytes;
3562 ///
3563 /// * `data` must be the [`Bucket`] at the `ctrl` index in the table;
3564 ///
3565 /// * the value of `len` must be less than or equal to the number of table buckets,
3566 /// and the returned value of `ctrl.as_ptr().add(len).offset_from(ctrl.as_ptr())`
3567 /// must be positive.
3568 ///
3569 /// * The `ctrl.add(len)` pointer must be either in bounds or one
3570 /// byte past the end of the same [allocated table].
3571 ///
3572 /// * The `len` must be a power of two.
3573 ///
3574 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3575 #[cfg_attr(feature = "inline-more", inline)]
3576 unsafe fn new(ctrl: *const u8, data: Bucket<T>, len: usize) -> Self {
3577 debug_assert_ne!(len, 0);
3578 debug_assert_eq!(ctrl as usize % Group::WIDTH, 0);
3579 // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`]
3580 let end = unsafe { ctrl.add(len) };
3581
3582 // Load the first group and advance ctrl to point to the next group
3583 // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`]
3584 let (current_group, next_ctrl) = unsafe {
3585 (
3586 Group::load_aligned(ctrl.cast()).match_full(),
3587 ctrl.add(Group::WIDTH),
3588 )
3589 };
3590
3591 Self {
3592 current_group: current_group.into_iter(),
3593 data,
3594 next_ctrl,
3595 end,
3596 }
3597 }
3598
3599 /// Splits a `RawIterRange` into two halves.
3600 ///
3601 /// Returns `None` if the remaining range is smaller than or equal to the
3602 /// group width.
3603 #[cfg_attr(feature = "inline-more", inline)]
3604 #[cfg(feature = "rayon")]
3605 pub(crate) fn split(mut self) -> (Self, Option<RawIterRange<T>>) {
3606 unsafe {
3607 if self.end <= self.next_ctrl {
3608 // Nothing to split if the group that we are current processing
3609 // is the last one.
3610 (self, None)
3611 } else {
3612 // len is the remaining number of elements after the group that
3613 // we are currently processing. It must be a multiple of the
3614 // group size (small tables are caught by the check above).
3615 let len = offset_from(self.end, self.next_ctrl);
3616 debug_assert_eq!(len % Group::WIDTH, 0);
3617
3618 // Split the remaining elements into two halves, but round the
3619 // midpoint down in case there is an odd number of groups
3620 // remaining. This ensures that:
3621 // - The tail is at least 1 group long.
3622 // - The split is roughly even considering we still have the
3623 // current group to process.
3624 let mid = (len / 2) & !(Group::WIDTH - 1);
3625
3626 let tail = Self::new(
3627 self.next_ctrl.add(mid),
3628 self.data.next_n(Group::WIDTH).next_n(mid),
3629 len - mid,
3630 );
3631 debug_assert_eq!(
3632 self.data.next_n(Group::WIDTH).next_n(mid).ptr,
3633 tail.data.ptr
3634 );
3635 debug_assert_eq!(self.end, tail.end);
3636 self.end = self.next_ctrl.add(mid);
3637 debug_assert_eq!(self.end.add(Group::WIDTH), tail.next_ctrl);
3638 (self, Some(tail))
3639 }
3640 }
3641 }
3642
3643 /// # Safety
3644 /// If `DO_CHECK_PTR_RANGE` is false, caller must ensure that we never try to iterate
3645 /// after yielding all elements.
3646 #[cfg_attr(feature = "inline-more", inline)]
3647 unsafe fn next_impl<const DO_CHECK_PTR_RANGE: bool>(&mut self) -> Option<Bucket<T>> {
3648 loop {
3649 if let Some(index) = self.current_group.next() {
3650 return Some(unsafe { self.data.next_n(index) });
3651 }
3652
3653 if DO_CHECK_PTR_RANGE && self.next_ctrl >= self.end {
3654 return None;
3655 }
3656
3657 // We might read past self.end up to the next group boundary,
3658 // but this is fine because it only occurs on tables smaller
3659 // than the group size where the trailing control bytes are all
3660 // EMPTY. On larger tables self.end is guaranteed to be aligned
3661 // to the group size (since tables are power-of-two sized).
3662 unsafe {
3663 self.current_group = Group::load_aligned(self.next_ctrl.cast())
3664 .match_full()
3665 .into_iter();
3666 self.data = self.data.next_n(Group::WIDTH);
3667 self.next_ctrl = self.next_ctrl.add(Group::WIDTH);
3668 }
3669 }
3670 }
3671
3672 /// Folds every element into an accumulator by applying an operation,
3673 /// returning the final result.
3674 ///
3675 /// `fold_impl()` takes three arguments: the number of items remaining in
3676 /// the iterator, an initial value, and a closure with two arguments: an
3677 /// 'accumulator', and an element. The closure returns the value that the
3678 /// accumulator should have for the next iteration.
3679 ///
3680 /// The initial value is the value the accumulator will have on the first call.
3681 ///
3682 /// After applying this closure to every element of the iterator, `fold_impl()`
3683 /// returns the accumulator.
3684 ///
3685 /// # Safety
3686 ///
3687 /// If any of the following conditions are violated, the result is
3688 /// [`Undefined Behavior`]:
3689 ///
3690 /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved,
3691 /// i.e. table outlives the `RawIterRange`;
3692 ///
3693 /// * The provided `n` value must match the actual number of items
3694 /// in the table.
3695 ///
3696 /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3697 #[expect(clippy::while_let_on_iterator)]
3698 #[cfg_attr(feature = "inline-more", inline)]
3699 unsafe fn fold_impl<F, B>(mut self, mut n: usize, mut acc: B, mut f: F) -> B
3700 where
3701 F: FnMut(B, Bucket<T>) -> B,
3702 {
3703 loop {
3704 while let Some(index) = self.current_group.next() {
3705 // The returned `index` will always be in the range `0..Group::WIDTH`,
3706 // so that calling `self.data.next_n(index)` is safe (see detailed explanation below).
3707 debug_assert!(n != 0);
3708 let bucket = unsafe { self.data.next_n(index) };
3709 acc = f(acc, bucket);
3710 n -= 1;
3711 }
3712
3713 if n == 0 {
3714 return acc;
3715 }
3716
3717 // SAFETY: The caller of this function ensures that:
3718 //
3719 // 1. The provided `n` value matches the actual number of items in the table;
3720 // 2. The table is alive and did not moved.
3721 //
3722 // Taking the above into account, we always stay within the bounds, because:
3723 //
3724 // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH),
3725 // we will never end up in the given branch, since we should have already
3726 // yielded all the elements of the table.
3727 //
3728 // 2. For tables larger than the group width. The number of buckets is a
3729 // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since
3730 // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the
3731 // start of the array of control bytes, and never try to iterate after
3732 // getting all the elements, the last `self.current_group` will read bytes
3733 // from the `self.num_buckets() - Group::WIDTH` index. We know also that
3734 // `self.current_group.next()` will always return indices within the range
3735 // `0..Group::WIDTH`.
3736 //
3737 // Knowing all of the above and taking into account that we are synchronizing
3738 // the `self.data` index with the index we used to read the `self.current_group`,
3739 // the subsequent `self.data.next_n(index)` will always return a bucket with
3740 // an index number less than `self.num_buckets()`.
3741 //
3742 // The last `self.next_ctrl`, whose index would be `self.num_buckets()`, will never
3743 // actually be read, since we should have already yielded all the elements of
3744 // the table.
3745 unsafe {
3746 self.current_group = Group::load_aligned(self.next_ctrl.cast())
3747 .match_full()
3748 .into_iter();
3749 self.data = self.data.next_n(Group::WIDTH);
3750 self.next_ctrl = self.next_ctrl.add(Group::WIDTH);
3751 }
3752 }
3753 }
3754}
3755
3756// We make raw iterators unconditionally Send and Sync, and let the PhantomData
3757// in the actual iterator implementations determine the real Send/Sync bounds.
3758unsafe impl<T> Send for RawIterRange<T> {}
3759unsafe impl<T> Sync for RawIterRange<T> {}
3760
3761impl<T> Clone for RawIterRange<T> {
3762 #[cfg_attr(feature = "inline-more", inline)]
3763 fn clone(&self) -> Self {
3764 Self {
3765 data: self.data.clone(),
3766 next_ctrl: self.next_ctrl,
3767 current_group: self.current_group.clone(),
3768 end: self.end,
3769 }
3770 }
3771}
3772
3773impl<T> Iterator for RawIterRange<T> {
3774 type Item = Bucket<T>;
3775
3776 #[cfg_attr(feature = "inline-more", inline)]
3777 fn next(&mut self) -> Option<Bucket<T>> {
3778 unsafe {
3779 // SAFETY: We set checker flag to true.
3780 self.next_impl::<true>()
3781 }
3782 }
3783
3784 #[inline]
3785 fn size_hint(&self) -> (usize, Option<usize>) {
3786 // We don't have an item count, so just guess based on the range size.
3787 let remaining_buckets = if self.end > self.next_ctrl {
3788 unsafe { offset_from(self.end, self.next_ctrl) }
3789 } else {
3790 0
3791 };
3792
3793 // Add a group width to include the group we are currently processing.
3794 (0, Some(Group::WIDTH + remaining_buckets))
3795 }
3796}
3797
3798impl<T> FusedIterator for RawIterRange<T> {}
3799
3800/// Iterator which returns a raw pointer to every full bucket in the table.
3801///
3802/// For maximum flexibility this iterator is not bound by a lifetime, but you
3803/// must observe several rules when using it:
3804/// - You must not free the hash table while iterating (including via growing/shrinking).
3805/// - It is fine to erase a bucket that has been yielded by the iterator.
3806/// - Erasing a bucket that has not yet been yielded by the iterator may still
3807/// result in the iterator yielding that bucket (unless `reflect_remove` is called).
3808/// - It is unspecified whether an element inserted after the iterator was
3809/// created will be yielded by that iterator (unless `reflect_insert` is called).
3810/// - The order in which the iterator yields bucket is unspecified and may
3811/// change in the future.
3812pub(crate) struct RawIter<T> {
3813 pub(crate) iter: RawIterRange<T>,
3814 items: usize,
3815}
3816
3817impl<T> RawIter<T> {
3818 unsafe fn drop_elements(&mut self) {
3819 unsafe {
3820 if T::NEEDS_DROP && self.items != 0 {
3821 for item in self {
3822 item.drop();
3823 }
3824 }
3825 }
3826 }
3827}
3828
3829impl<T> Clone for RawIter<T> {
3830 #[cfg_attr(feature = "inline-more", inline)]
3831 fn clone(&self) -> Self {
3832 Self {
3833 iter: self.iter.clone(),
3834 items: self.items,
3835 }
3836 }
3837}
3838impl<T> Default for RawIter<T> {
3839 #[cfg_attr(feature = "inline-more", inline)]
3840 fn default() -> Self {
3841 // SAFETY: Because the table is static, it always outlives the iter.
3842 unsafe { RawTableInner::NEW.iter() }
3843 }
3844}
3845
3846impl<T> Iterator for RawIter<T> {
3847 type Item = Bucket<T>;
3848
3849 #[cfg_attr(feature = "inline-more", inline)]
3850 fn next(&mut self) -> Option<Bucket<T>> {
3851 // Inner iterator iterates over buckets
3852 // so it can do unnecessary work if we already yielded all items.
3853 if self.items == 0 {
3854 return None;
3855 }
3856
3857 let nxt = unsafe {
3858 // SAFETY: We check number of items to yield using `items` field.
3859 self.iter.next_impl::<false>()
3860 };
3861
3862 debug_assert!(nxt.is_some());
3863 self.items -= 1;
3864
3865 nxt
3866 }
3867
3868 #[inline]
3869 fn size_hint(&self) -> (usize, Option<usize>) {
3870 (self.items, Some(self.items))
3871 }
3872
3873 #[inline]
3874 fn fold<B, F>(self, init: B, f: F) -> B
3875 where
3876 Self: Sized,
3877 F: FnMut(B, Self::Item) -> B,
3878 {
3879 unsafe { self.iter.fold_impl(self.items, init, f) }
3880 }
3881}
3882
3883impl<T> ExactSizeIterator for RawIter<T> {}
3884impl<T> FusedIterator for RawIter<T> {}
3885
3886/// Iterator which returns an index of every full bucket in the table.
3887///
3888/// For maximum flexibility this iterator is not bound by a lifetime, but you
3889/// must observe several rules when using it:
3890/// - You must not free the hash table while iterating (including via growing/shrinking).
3891/// - It is fine to erase a bucket that has been yielded by the iterator.
3892/// - Erasing a bucket that has not yet been yielded by the iterator may still
3893/// result in the iterator yielding index of that bucket.
3894/// - It is unspecified whether an element inserted after the iterator was
3895/// created will be yielded by that iterator.
3896/// - The order in which the iterator yields indices of the buckets is unspecified
3897/// and may change in the future.
3898#[derive(Clone)]
3899pub(crate) struct FullBucketsIndices {
3900 // Mask of full buckets in the current group. Bits are cleared from this
3901 // mask as each element is processed.
3902 current_group: BitMaskIter,
3903
3904 // Initial value of the bytes' indices of the current group (relative
3905 // to the start of the control bytes).
3906 group_first_index: usize,
3907
3908 // Pointer to the current group of control bytes,
3909 // Must be aligned to the group size (Group::WIDTH).
3910 ctrl: NonNull<u8>,
3911
3912 // Number of elements in the table.
3913 items: usize,
3914}
3915
3916impl Default for FullBucketsIndices {
3917 #[cfg_attr(feature = "inline-more", inline)]
3918 fn default() -> Self {
3919 // SAFETY: Because the table is static, it always outlives the iter.
3920 unsafe { RawTableInner::NEW.full_buckets_indices() }
3921 }
3922}
3923
3924impl FullBucketsIndices {
3925 /// Advances the iterator and returns the next value.
3926 ///
3927 /// # Safety
3928 ///
3929 /// If any of the following conditions are violated, the result is
3930 /// [`Undefined Behavior`]:
3931 ///
3932 /// * The [`RawTableInner`] / [`RawTable`] must be alive and not moved,
3933 /// i.e. table outlives the `FullBucketsIndices`;
3934 ///
3935 /// * It never tries to iterate after getting all elements.
3936 ///
3937 /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
3938 #[inline(always)]
3939 unsafe fn next_impl(&mut self) -> Option<usize> {
3940 loop {
3941 if let Some(index) = self.current_group.next() {
3942 // The returned `self.group_first_index + index` will always
3943 // be in the range `0..self.num_buckets()`. See explanation below.
3944 return Some(self.group_first_index + index);
3945 }
3946
3947 // SAFETY: The caller of this function ensures that:
3948 //
3949 // 1. It never tries to iterate after getting all the elements;
3950 // 2. The table is alive and did not moved;
3951 // 3. The first `self.ctrl` pointed to the start of the array of control bytes.
3952 //
3953 // Taking the above into account, we always stay within the bounds, because:
3954 //
3955 // 1. For tables smaller than the group width (self.num_buckets() <= Group::WIDTH),
3956 // we will never end up in the given branch, since we should have already
3957 // yielded all the elements of the table.
3958 //
3959 // 2. For tables larger than the group width. The number of buckets is a
3960 // power of two (2 ^ n), Group::WIDTH is also power of two (2 ^ k). Since
3961 // `(2 ^ n) > (2 ^ k)`, than `(2 ^ n) % (2 ^ k) = 0`. As we start from the
3962 // the start of the array of control bytes, and never try to iterate after
3963 // getting all the elements, the last `self.ctrl` will be equal to
3964 // the `self.num_buckets() - Group::WIDTH`, so `self.current_group.next()`
3965 // will always contains indices within the range `0..Group::WIDTH`,
3966 // and subsequent `self.group_first_index + index` will always return a
3967 // number less than `self.num_buckets()`.
3968 unsafe {
3969 self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH));
3970 }
3971
3972 // SAFETY: See explanation above.
3973 unsafe {
3974 self.current_group = Group::load_aligned(self.ctrl.as_ptr().cast())
3975 .match_full()
3976 .into_iter();
3977 self.group_first_index += Group::WIDTH;
3978 }
3979 }
3980 }
3981}
3982
3983impl Iterator for FullBucketsIndices {
3984 type Item = usize;
3985
3986 /// Advances the iterator and returns the next value. It is up to
3987 /// the caller to ensure that the `RawTable` outlives the `FullBucketsIndices`,
3988 /// because we cannot make the `next` method unsafe.
3989 #[inline(always)]
3990 fn next(&mut self) -> Option<usize> {
3991 // Return if we already yielded all items.
3992 if self.items == 0 {
3993 return None;
3994 }
3995
3996 // SAFETY:
3997 // 1. We check number of items to yield using `items` field.
3998 // 2. The caller ensures that the table is alive and has not moved.
3999 let nxt = unsafe { self.next_impl() };
4000
4001 debug_assert!(nxt.is_some());
4002 self.items -= 1;
4003
4004 nxt
4005 }
4006
4007 #[inline(always)]
4008 fn size_hint(&self) -> (usize, Option<usize>) {
4009 (self.items, Some(self.items))
4010 }
4011}
4012
4013impl ExactSizeIterator for FullBucketsIndices {}
4014impl FusedIterator for FullBucketsIndices {}
4015
4016/// Iterator which consumes a table and returns elements.
4017pub(crate) struct RawIntoIter<T, A: Allocator = Global> {
4018 iter: RawIter<T>,
4019 allocation: Option<(NonNull<u8>, Layout, A)>,
4020 marker: PhantomData<T>,
4021}
4022
4023impl<T, A: Allocator> RawIntoIter<T, A> {
4024 #[cfg_attr(feature = "inline-more", inline)]
4025 pub(crate) fn iter(&self) -> RawIter<T> {
4026 self.iter.clone()
4027 }
4028}
4029
4030unsafe impl<T, A: Allocator> Send for RawIntoIter<T, A>
4031where
4032 T: Send,
4033 A: Send,
4034{
4035}
4036unsafe impl<T, A: Allocator> Sync for RawIntoIter<T, A>
4037where
4038 T: Sync,
4039 A: Sync,
4040{
4041}
4042
4043#[cfg(feature = "nightly")]
4044unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawIntoIter<T, A> {
4045 #[cfg_attr(feature = "inline-more", inline)]
4046 fn drop(&mut self) {
4047 unsafe {
4048 // Drop all remaining elements
4049 self.iter.drop_elements();
4050
4051 // Free the table
4052 if let Some((ptr, layout, ref alloc)) = self.allocation {
4053 alloc.deallocate(ptr, layout);
4054 }
4055 }
4056 }
4057}
4058#[cfg(not(feature = "nightly"))]
4059impl<T, A: Allocator> Drop for RawIntoIter<T, A> {
4060 #[cfg_attr(feature = "inline-more", inline)]
4061 fn drop(&mut self) {
4062 unsafe {
4063 // Drop all remaining elements
4064 self.iter.drop_elements();
4065
4066 // Free the table
4067 if let Some((ptr, layout, ref alloc)) = self.allocation {
4068 alloc.deallocate(ptr, layout);
4069 }
4070 }
4071 }
4072}
4073
4074impl<T, A: Allocator> Default for RawIntoIter<T, A> {
4075 fn default() -> Self {
4076 Self {
4077 iter: Default::default(),
4078 allocation: None,
4079 marker: PhantomData,
4080 }
4081 }
4082}
4083impl<T, A: Allocator> Iterator for RawIntoIter<T, A> {
4084 type Item = T;
4085
4086 #[cfg_attr(feature = "inline-more", inline)]
4087 fn next(&mut self) -> Option<T> {
4088 unsafe { Some(self.iter.next()?.read()) }
4089 }
4090
4091 #[inline]
4092 fn size_hint(&self) -> (usize, Option<usize>) {
4093 self.iter.size_hint()
4094 }
4095}
4096
4097impl<T, A: Allocator> ExactSizeIterator for RawIntoIter<T, A> {}
4098impl<T, A: Allocator> FusedIterator for RawIntoIter<T, A> {}
4099
4100/// Iterator which consumes elements without freeing the table storage.
4101pub(crate) struct RawDrain<'a, T, A: Allocator = Global> {
4102 iter: RawIter<T>,
4103
4104 // The table is moved into the iterator for the duration of the drain. This
4105 // ensures that an empty table is left if the drain iterator is leaked
4106 // without dropping.
4107 table: RawTableInner,
4108 orig_table: NonNull<RawTableInner>,
4109
4110 // We don't use a &'a mut RawTable<T> because we want RawDrain to be
4111 // covariant over T.
4112 marker: PhantomData<&'a RawTable<T, A>>,
4113}
4114
4115impl<T, A: Allocator> RawDrain<'_, T, A> {
4116 #[cfg_attr(feature = "inline-more", inline)]
4117 pub(crate) fn iter(&self) -> RawIter<T> {
4118 self.iter.clone()
4119 }
4120}
4121
4122unsafe impl<T, A: Allocator> Send for RawDrain<'_, T, A>
4123where
4124 T: Send,
4125 A: Send,
4126{
4127}
4128unsafe impl<T, A: Allocator> Sync for RawDrain<'_, T, A>
4129where
4130 T: Sync,
4131 A: Sync,
4132{
4133}
4134
4135impl<T, A: Allocator> Drop for RawDrain<'_, T, A> {
4136 #[cfg_attr(feature = "inline-more", inline)]
4137 fn drop(&mut self) {
4138 unsafe {
4139 // Drop all remaining elements. Note that this may panic.
4140 self.iter.drop_elements();
4141
4142 // Reset the contents of the table now that all elements have been
4143 // dropped.
4144 self.table.clear_no_drop();
4145
4146 // Move the now empty table back to its original location.
4147 self.orig_table
4148 .as_ptr()
4149 .copy_from_nonoverlapping(&raw const self.table, 1);
4150 }
4151 }
4152}
4153
4154impl<T, A: Allocator> Iterator for RawDrain<'_, T, A> {
4155 type Item = T;
4156
4157 #[cfg_attr(feature = "inline-more", inline)]
4158 fn next(&mut self) -> Option<T> {
4159 unsafe {
4160 let item = self.iter.next()?;
4161 Some(item.read())
4162 }
4163 }
4164
4165 #[inline]
4166 fn size_hint(&self) -> (usize, Option<usize>) {
4167 self.iter.size_hint()
4168 }
4169}
4170
4171impl<T, A: Allocator> ExactSizeIterator for RawDrain<'_, T, A> {}
4172impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}
4173
4174/// Iterator over occupied buckets that could match a given hash.
4175///
4176/// `RawTable` only stores 7 bits of the hash value, so this iterator may return
4177/// items that have a hash value different than the one provided. You should
4178/// always validate the returned values before using them.
4179///
4180/// For maximum flexibility this iterator is not bound by a lifetime, but you
4181/// must observe several rules when using it:
4182/// - You must not free the hash table while iterating (including via growing/shrinking).
4183/// - It is fine to erase a bucket that has been yielded by the iterator.
4184/// - Erasing a bucket that has not yet been yielded by the iterator may still
4185/// result in the iterator yielding that bucket.
4186/// - It is unspecified whether an element inserted after the iterator was
4187/// created will be yielded by that iterator.
4188/// - The order in which the iterator yields buckets is unspecified and may
4189/// change in the future.
4190pub(crate) struct RawIterHash<T> {
4191 inner: RawIterHashIndices,
4192 _marker: PhantomData<T>,
4193}
4194
4195#[derive(Clone)]
4196pub(crate) struct RawIterHashIndices {
4197 // See `RawTableInner`'s corresponding fields for details.
4198 // We can't store a `*const RawTableInner` as it would get
4199 // invalidated by the user calling `&mut` methods on `RawTable`.
4200 bucket_mask: usize,
4201 ctrl: NonNull<u8>,
4202
4203 // The top 7 bits of the hash.
4204 tag_hash: Tag,
4205
4206 // The sequence of groups to probe in the search.
4207 probe_seq: ProbeSeq,
4208
4209 group: Group,
4210
4211 // The elements within the group with a matching tag-hash.
4212 bitmask: BitMaskIter,
4213}
4214
4215impl<T> RawIterHash<T> {
4216 #[cfg_attr(feature = "inline-more", inline)]
4217 unsafe fn new<A: Allocator>(table: &RawTable<T, A>, hash: u64) -> Self {
4218 RawIterHash {
4219 inner: unsafe { RawIterHashIndices::new(&table.table, hash) },
4220 _marker: PhantomData,
4221 }
4222 }
4223}
4224
4225impl<T> Clone for RawIterHash<T> {
4226 #[cfg_attr(feature = "inline-more", inline)]
4227 fn clone(&self) -> Self {
4228 Self {
4229 inner: self.inner.clone(),
4230 _marker: PhantomData,
4231 }
4232 }
4233}
4234
4235impl<T> Default for RawIterHash<T> {
4236 #[cfg_attr(feature = "inline-more", inline)]
4237 fn default() -> Self {
4238 Self {
4239 inner: RawIterHashIndices::default(),
4240 _marker: PhantomData,
4241 }
4242 }
4243}
4244
4245impl Default for RawIterHashIndices {
4246 #[cfg_attr(feature = "inline-more", inline)]
4247 fn default() -> Self {
4248 // SAFETY: Because the table is static, it always outlives the iter.
4249 unsafe { RawIterHashIndices::new(&RawTableInner::NEW, 0) }
4250 }
4251}
4252
4253impl RawIterHashIndices {
4254 #[cfg_attr(feature = "inline-more", inline)]
4255 unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
4256 let tag_hash = Tag::full(hash);
4257 let probe_seq = table.probe_seq(hash);
4258 let group = unsafe { Group::load(table.ctrl(probe_seq.pos)) };
4259 let bitmask = group.match_tag(tag_hash).into_iter();
4260
4261 RawIterHashIndices {
4262 bucket_mask: table.bucket_mask,
4263 ctrl: table.ctrl,
4264 tag_hash,
4265 probe_seq,
4266 group,
4267 bitmask,
4268 }
4269 }
4270}
4271
4272impl<T> Iterator for RawIterHash<T> {
4273 type Item = Bucket<T>;
4274
4275 fn next(&mut self) -> Option<Bucket<T>> {
4276 unsafe {
4277 match self.inner.next() {
4278 Some(index) => {
4279 // Can't use `RawTable::bucket` here as we don't have
4280 // an actual `RawTable` reference to use.
4281 debug_assert!(index <= self.inner.bucket_mask);
4282 let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index);
4283 Some(bucket)
4284 }
4285 None => None,
4286 }
4287 }
4288 }
4289}
4290
4291impl Iterator for RawIterHashIndices {
4292 type Item = usize;
4293
4294 fn next(&mut self) -> Option<Self::Item> {
4295 unsafe {
4296 loop {
4297 if let Some(bit) = self.bitmask.next() {
4298 let index = (self.probe_seq.pos + bit) & self.bucket_mask;
4299 return Some(index);
4300 }
4301 if likely(self.group.match_empty().any_bit_set()) {
4302 return None;
4303 }
4304 self.probe_seq.move_next(self.bucket_mask);
4305
4306 // Can't use `RawTableInner::ctrl` here as we don't have
4307 // an actual `RawTableInner` reference to use.
4308 let index = self.probe_seq.pos;
4309 debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH);
4310 let group_ctrl = self.ctrl.as_ptr().add(index).cast();
4311
4312 self.group = Group::load(group_ctrl);
4313 self.bitmask = self.group.match_tag(self.tag_hash).into_iter();
4314 }
4315 }
4316 }
4317}
4318
4319pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
4320 pub iter: RawIter<T>,
4321 pub table: &'a mut RawTable<T, A>,
4322}
4323
4324impl<T, A: Allocator> RawExtractIf<'_, T, A> {
4325 #[cfg_attr(feature = "inline-more", inline)]
4326 pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
4327 where
4328 F: FnMut(&mut T) -> bool,
4329 {
4330 unsafe {
4331 for item in &mut self.iter {
4332 if f(item.as_mut()) {
4333 return Some(self.table.remove(item).0);
4334 }
4335 }
4336 }
4337 None
4338 }
4339}
4340
4341#[cfg(test)]
4342mod test_map {
4343 use super::*;
4344
4345 #[test]
4346 fn test_prev_pow2() {
4347 // Skip 0, not defined for that input.
4348 let mut pow2: usize = 1;
4349 while (pow2 << 1) > 0 {
4350 let next_pow2 = pow2 << 1;
4351 assert_eq!(pow2, prev_pow2(pow2));
4352 // Need to skip 2, because it's also a power of 2, so it doesn't
4353 // return the previous power of 2.
4354 if next_pow2 > 2 {
4355 assert_eq!(pow2, prev_pow2(pow2 + 1));
4356 assert_eq!(pow2, prev_pow2(next_pow2 - 1));
4357 }
4358 pow2 = next_pow2;
4359 }
4360 }
4361
4362 #[test]
4363 fn test_minimum_capacity_for_small_types() {
4364 #[track_caller]
4365 fn test_t<T>() {
4366 let raw_table: RawTable<T> = RawTable::with_capacity(1);
4367 let actual_buckets = raw_table.num_buckets();
4368 let min_buckets = Group::WIDTH / core::mem::size_of::<T>();
4369 assert!(
4370 actual_buckets >= min_buckets,
4371 "expected at least {min_buckets} buckets, got {actual_buckets} buckets"
4372 );
4373 }
4374
4375 test_t::<u8>();
4376
4377 // This is only "small" for some platforms, like x86_64 with SSE2, but
4378 // there's no harm in running it on other platforms.
4379 test_t::<u16>();
4380 }
4381
4382 fn rehash_in_place<T>(table: &mut RawTable<T>, hasher: impl Fn(&T) -> u64) {
4383 unsafe {
4384 table.table.rehash_in_place(
4385 &|table, index| hasher(table.bucket::<T>(index).as_ref()),
4386 mem::size_of::<T>(),
4387 if mem::needs_drop::<T>() {
4388 Some(|ptr| ptr::drop_in_place(ptr.cast::<T>()))
4389 } else {
4390 None
4391 },
4392 );
4393 }
4394 }
4395
4396 #[test]
4397 fn rehash() {
4398 let mut table = RawTable::new();
4399 let hasher = |i: &u64| *i;
4400 for i in 0..100 {
4401 table.insert(i, i, hasher);
4402 }
4403
4404 for i in 0..100 {
4405 unsafe {
4406 assert_eq!(table.find(i, |x| *x == i).map(|b| b.read()), Some(i));
4407 }
4408 assert!(table.find(i + 100, |x| *x == i + 100).is_none());
4409 }
4410
4411 rehash_in_place(&mut table, hasher);
4412
4413 for i in 0..100 {
4414 unsafe {
4415 assert_eq!(table.find(i, |x| *x == i).map(|b| b.read()), Some(i));
4416 }
4417 assert!(table.find(i + 100, |x| *x == i + 100).is_none());
4418 }
4419 }
4420
4421 /// CHECKING THAT WE ARE NOT TRYING TO READ THE MEMORY OF
4422 /// AN UNINITIALIZED TABLE DURING THE DROP
4423 #[test]
4424 fn test_drop_uninitialized() {
4425 use stdalloc::vec::Vec;
4426
4427 let table = unsafe {
4428 // SAFETY: The `buckets` is power of two and we're not
4429 // trying to actually use the returned RawTable.
4430 RawTable::<(u64, Vec<i32>)>::new_uninitialized(Global, 8, Fallibility::Infallible)
4431 .unwrap()
4432 };
4433 drop(table);
4434 }
4435
4436 /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS`
4437 /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES.
4438 #[test]
4439 fn test_drop_zero_items() {
4440 use stdalloc::vec::Vec;
4441 unsafe {
4442 // SAFETY: The `buckets` is power of two and we're not
4443 // trying to actually use the returned RawTable.
4444 let mut table =
4445 RawTable::<(u64, Vec<i32>)>::new_uninitialized(Global, 8, Fallibility::Infallible)
4446 .unwrap();
4447
4448 // WE SIMULATE, AS IT WERE, A FULL TABLE.
4449
4450 // SAFETY: We checked that the table is allocated and therefore the table already has
4451 // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for)
4452 // so writing `table.table.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe.
4453 table.table.ctrl_slice().fill_empty();
4454
4455 // SAFETY: table.capacity() is guaranteed to be smaller than table.num_buckets()
4456 table.table.ctrl(0).write_bytes(0, table.capacity());
4457
4458 // Fix up the trailing control bytes. See the comments in set_ctrl
4459 // for the handling of tables smaller than the group width.
4460 if table.num_buckets() < Group::WIDTH {
4461 // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
4462 // so copying `self.num_buckets() == self.bucket_mask + 1` bytes with offset equal to
4463 // `Group::WIDTH` is safe
4464 table
4465 .table
4466 .ctrl(0)
4467 .copy_to(table.table.ctrl(Group::WIDTH), table.table.num_buckets());
4468 } else {
4469 // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
4470 // control bytes,so copying `Group::WIDTH` bytes with offset equal
4471 // to `self.num_buckets() == self.bucket_mask + 1` is safe
4472 table
4473 .table
4474 .ctrl(0)
4475 .copy_to(table.table.ctrl(table.table.num_buckets()), Group::WIDTH);
4476 }
4477 drop(table);
4478 }
4479 }
4480
4481 /// CHECKING THAT WE DON'T TRY TO DROP DATA IF THE `ITEMS`
4482 /// ARE ZERO, EVEN IF WE HAVE `FULL` CONTROL BYTES.
4483 #[test]
4484 #[cfg(panic = "unwind")]
4485 fn test_catch_panic_clone_from() {
4486 use super::{AllocError, Allocator, Global};
4487 use core::sync::atomic::{AtomicI8, Ordering};
4488 use std::thread;
4489 use stdalloc::sync::Arc;
4490 use stdalloc::vec::Vec;
4491
4492 struct MyAllocInner {
4493 drop_count: Arc<AtomicI8>,
4494 }
4495
4496 #[derive(Clone)]
4497 struct MyAlloc {
4498 _inner: Arc<MyAllocInner>,
4499 }
4500
4501 impl Drop for MyAllocInner {
4502 fn drop(&mut self) {
4503 println!("MyAlloc freed.");
4504 self.drop_count.fetch_sub(1, Ordering::SeqCst);
4505 }
4506 }
4507
4508 unsafe impl Allocator for MyAlloc {
4509 fn allocate(&self, layout: Layout) -> std::result::Result<NonNull<[u8]>, AllocError> {
4510 let g = Global;
4511 g.allocate(layout)
4512 }
4513
4514 unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
4515 unsafe {
4516 let g = Global;
4517 g.deallocate(ptr, layout);
4518 }
4519 }
4520 }
4521
4522 const DISARMED: bool = false;
4523 const ARMED: bool = true;
4524
4525 struct CheckedCloneDrop {
4526 panic_in_clone: bool,
4527 dropped: bool,
4528 need_drop: Vec<u64>,
4529 }
4530
4531 impl Clone for CheckedCloneDrop {
4532 fn clone(&self) -> Self {
4533 if self.panic_in_clone {
4534 panic!("panic in clone")
4535 }
4536 Self {
4537 panic_in_clone: self.panic_in_clone,
4538 dropped: self.dropped,
4539 need_drop: self.need_drop.clone(),
4540 }
4541 }
4542 }
4543
4544 impl Drop for CheckedCloneDrop {
4545 fn drop(&mut self) {
4546 if self.dropped {
4547 panic!("double drop");
4548 }
4549 self.dropped = true;
4550 }
4551 }
4552
4553 let dropped: Arc<AtomicI8> = Arc::new(AtomicI8::new(2));
4554
4555 let mut table = RawTable::new_in(MyAlloc {
4556 _inner: Arc::new(MyAllocInner {
4557 drop_count: dropped.clone(),
4558 }),
4559 });
4560
4561 for (idx, panic_in_clone) in core::iter::repeat_n(DISARMED, 7).enumerate() {
4562 let idx = idx as u64;
4563 table.insert(
4564 idx,
4565 (
4566 idx,
4567 CheckedCloneDrop {
4568 panic_in_clone,
4569 dropped: false,
4570 need_drop: vec![idx],
4571 },
4572 ),
4573 |(k, _)| *k,
4574 );
4575 }
4576
4577 assert_eq!(table.len(), 7);
4578
4579 thread::scope(|s| {
4580 let result = s.spawn(|| {
4581 let armed_flags = [
4582 DISARMED, DISARMED, ARMED, DISARMED, DISARMED, DISARMED, DISARMED,
4583 ];
4584 let mut scope_table = RawTable::new_in(MyAlloc {
4585 _inner: Arc::new(MyAllocInner {
4586 drop_count: dropped.clone(),
4587 }),
4588 });
4589 for (idx, &panic_in_clone) in armed_flags.iter().enumerate() {
4590 let idx = idx as u64;
4591 scope_table.insert(
4592 idx,
4593 (
4594 idx,
4595 CheckedCloneDrop {
4596 panic_in_clone,
4597 dropped: false,
4598 need_drop: vec![idx + 100],
4599 },
4600 ),
4601 |(k, _)| *k,
4602 );
4603 }
4604 table.clone_from(&scope_table);
4605 });
4606 assert!(result.join().is_err());
4607 });
4608
4609 // Let's check that all iterators work fine and do not return elements
4610 // (especially `RawIterRange`, which does not depend on the number of
4611 // elements in the table, but looks directly at the control bytes)
4612 //
4613 // SAFETY: We know for sure that `RawTable` will outlive
4614 // the returned `RawIter / RawIterRange` iterator.
4615 assert_eq!(table.len(), 0);
4616 assert_eq!(unsafe { table.iter().count() }, 0);
4617 assert_eq!(unsafe { table.iter().iter.count() }, 0);
4618
4619 for idx in 0..table.num_buckets() {
4620 let idx = idx as u64;
4621 assert!(
4622 table.find(idx, |(k, _)| *k == idx).is_none(),
4623 "Index: {idx}"
4624 );
4625 }
4626
4627 // All allocator clones should already be dropped.
4628 assert_eq!(dropped.load(Ordering::SeqCst), 1);
4629 }
4630}