siphasher/
sip128.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash with a 128-bit output.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18
19use crate::common::{compress, load_int_le, u8to64_le};
20
21/// A 128-bit (2x64) hash output
22#[derive(Debug, Clone, Copy, Default)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Hash128 {
25    pub h1: u64,
26    pub h2: u64,
27}
28
29impl PartialEq for Hash128 {
30    /// Constant-time equality comparison to prevent timing attacks.
31    fn eq(&self, other: &Self) -> bool {
32        let x = (self.h1 ^ other.h1) | (self.h2 ^ other.h2);
33        unsafe { core::ptr::read_volatile(&x) == 0 }
34    }
35}
36
37impl Eq for Hash128 {}
38
39impl From<u128> for Hash128 {
40    fn from(v: u128) -> Self {
41        Hash128 {
42            h1: v as u64,
43            h2: (v >> 64) as u64,
44        }
45    }
46}
47
48impl From<Hash128> for u128 {
49    fn from(h: Hash128) -> u128 {
50        (h.h1 as u128) | ((h.h2 as u128) << 64)
51    }
52}
53
54/// An implementation of SipHash128 1-3.
55#[derive(Debug, Clone, Copy, Default)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57pub struct SipHasher13 {
58    hasher: Hasher<Sip13Rounds>,
59}
60
61/// An implementation of SipHash128 2-4.
62#[derive(Debug, Clone, Copy, Default)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64pub struct SipHasher24 {
65    hasher: Hasher<Sip24Rounds>,
66}
67
68/// An implementation of SipHash128 2-4.
69///
70/// SipHash is a general-purpose hashing function: it runs at a good
71/// speed (competitive with Spooky and City) and permits strong _keyed_
72/// hashing. This lets you key your hashtables from a strong RNG, such as
73/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
74///
75/// Although the SipHash algorithm is considered to be generally strong,
76/// it is not intended for cryptographic purposes. As such, all
77/// cryptographic uses of this implementation are _strongly discouraged_.
78#[derive(Debug, Clone, Copy, Default)]
79#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct SipHasher(SipHasher24);
81
82#[derive(Debug, Copy)]
83#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
84struct Hasher<S: Sip> {
85    k0: u64,
86    k1: u64,
87    length: usize, // how many bytes we've processed
88    state: State,  // hash State
89    tail: u64,     // unprocessed bytes le
90    ntail: usize,  // how many bytes in tail are valid
91    _marker: PhantomData<S>,
92}
93
94#[derive(Debug, Clone, Copy)]
95#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
96struct State {
97    // v0, v2 and v1, v3 show up in pairs in the algorithm,
98    // and simd implementations of SipHash will use vectors
99    // of v02 and v13. By placing them in this order in the struct,
100    // the compiler can pick up on just a few simd optimizations by itself.
101    v0: u64,
102    v2: u64,
103    v1: u64,
104    v3: u64,
105}
106
107pub trait Hasher128 {
108    /// Return a 128-bit hash
109    fn finish128(&self) -> Hash128;
110}
111
112impl SipHasher {
113    /// Creates a new `SipHasher` with the two initial keys set to 0.
114    #[inline]
115    pub fn new() -> SipHasher {
116        SipHasher::new_with_keys(0, 0)
117    }
118
119    /// Creates a `SipHasher` that is keyed off the provided keys.
120    #[inline]
121    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
122        SipHasher(SipHasher24::new_with_keys(key0, key1))
123    }
124
125    /// Creates a `SipHasher` from a 16 byte key.
126    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
127        let mut b0 = [0u8; 8];
128        let mut b1 = [0u8; 8];
129        b0.copy_from_slice(&key[0..8]);
130        b1.copy_from_slice(&key[8..16]);
131        let key0 = u64::from_le_bytes(b0);
132        let key1 = u64::from_le_bytes(b1);
133        Self::new_with_keys(key0, key1)
134    }
135
136    /// Get the keys used by this hasher
137    pub fn keys(&self) -> (u64, u64) {
138        (self.0.hasher.k0, self.0.hasher.k1)
139    }
140
141    /// Get the key used by this hasher as a 16 byte vector
142    pub fn key(&self) -> [u8; 16] {
143        let mut bytes = [0u8; 16];
144        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
145        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
146        bytes
147    }
148
149    /// Hash a byte array - This is the easiest and safest way to use SipHash.
150    #[inline]
151    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
152        let mut hasher = self.0.hasher;
153        hasher.write(bytes);
154        hasher.finish128()
155    }
156}
157
158impl Hasher128 for SipHasher {
159    /// Return a 128-bit hash
160    #[inline]
161    fn finish128(&self) -> Hash128 {
162        self.0.finish128()
163    }
164}
165
166impl SipHasher13 {
167    /// Creates a new `SipHasher13` with the two initial keys set to 0.
168    #[inline]
169    pub fn new() -> SipHasher13 {
170        SipHasher13::new_with_keys(0, 0)
171    }
172
173    /// Creates a `SipHasher13` that is keyed off the provided keys.
174    #[inline]
175    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
176        SipHasher13 {
177            hasher: Hasher::new_with_keys(key0, key1),
178        }
179    }
180
181    /// Creates a `SipHasher13` from a 16 byte key.
182    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
183        let mut b0 = [0u8; 8];
184        let mut b1 = [0u8; 8];
185        b0.copy_from_slice(&key[0..8]);
186        b1.copy_from_slice(&key[8..16]);
187        let key0 = u64::from_le_bytes(b0);
188        let key1 = u64::from_le_bytes(b1);
189        Self::new_with_keys(key0, key1)
190    }
191
192    /// Get the keys used by this hasher
193    pub fn keys(&self) -> (u64, u64) {
194        (self.hasher.k0, self.hasher.k1)
195    }
196
197    /// Get the key used by this hasher as a 16 byte vector
198    pub fn key(&self) -> [u8; 16] {
199        let mut bytes = [0u8; 16];
200        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
201        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
202        bytes
203    }
204
205    /// Hash a byte array - This is the easiest and safest way to use SipHash.
206    #[inline]
207    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
208        let mut hasher = self.hasher;
209        hasher.write(bytes);
210        hasher.finish128()
211    }
212}
213
214impl Hasher128 for SipHasher13 {
215    /// Return a 128-bit hash
216    #[inline]
217    fn finish128(&self) -> Hash128 {
218        self.hasher.finish128()
219    }
220}
221
222impl SipHasher24 {
223    /// Creates a new `SipHasher24` with the two initial keys set to 0.
224    #[inline]
225    pub fn new() -> SipHasher24 {
226        SipHasher24::new_with_keys(0, 0)
227    }
228
229    /// Creates a `SipHasher24` that is keyed off the provided keys.
230    #[inline]
231    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
232        SipHasher24 {
233            hasher: Hasher::new_with_keys(key0, key1),
234        }
235    }
236
237    /// Creates a `SipHasher24` from a 16 byte key.
238    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
239        let mut b0 = [0u8; 8];
240        let mut b1 = [0u8; 8];
241        b0.copy_from_slice(&key[0..8]);
242        b1.copy_from_slice(&key[8..16]);
243        let key0 = u64::from_le_bytes(b0);
244        let key1 = u64::from_le_bytes(b1);
245        Self::new_with_keys(key0, key1)
246    }
247
248    /// Get the keys used by this hasher
249    pub fn keys(&self) -> (u64, u64) {
250        (self.hasher.k0, self.hasher.k1)
251    }
252
253    /// Get the key used by this hasher as a 16 byte vector
254    pub fn key(&self) -> [u8; 16] {
255        let mut bytes = [0u8; 16];
256        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
257        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
258        bytes
259    }
260
261    /// Hash a byte array - This is the easiest and safest way to use SipHash.
262    #[inline]
263    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
264        let mut hasher = self.hasher;
265        hasher.write(bytes);
266        hasher.finish128()
267    }
268}
269
270impl Hasher128 for SipHasher24 {
271    /// Return a 128-bit hash
272    #[inline]
273    fn finish128(&self) -> Hash128 {
274        self.hasher.finish128()
275    }
276}
277
278impl<S: Sip> Hasher<S> {
279    #[inline]
280    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
281        let mut state = Hasher {
282            k0: key0,
283            k1: key1,
284            length: 0,
285            state: State {
286                v0: 0,
287                v1: 0xee,
288                v2: 0,
289                v3: 0,
290            },
291            tail: 0,
292            ntail: 0,
293            _marker: PhantomData,
294        };
295        state.reset();
296        state
297    }
298
299    #[inline]
300    fn reset(&mut self) {
301        self.length = 0;
302        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
303        self.state.v1 = self.k1 ^ 0x646f72616e646f83;
304        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
305        self.state.v3 = self.k1 ^ 0x7465646279746573;
306        self.ntail = 0;
307    }
308
309    // A specialized write function for values with size <= 8.
310    //
311    // The hashing of multi-byte integers depends on endianness. E.g.:
312    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
313    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
314    //
315    // This function does the right thing for little-endian hardware. On
316    // big-endian hardware `x` must be byte-swapped first to give the right
317    // behaviour. After any byte-swapping, the input must be zero-extended to
318    // 64-bits. The caller is responsible for the byte-swapping and
319    // zero-extension.
320    #[inline]
321    fn short_write<T>(&mut self, _x: T, x: u64) {
322        let size = mem::size_of::<T>();
323        self.length += size;
324
325        // The original number must be zero-extended, not sign-extended.
326        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
327
328        // The number of bytes needed to fill `self.tail`.
329        let needed = 8 - self.ntail;
330
331        self.tail |= x << (8 * self.ntail);
332        if size < needed {
333            self.ntail += size;
334            return;
335        }
336
337        // `self.tail` is full, process it.
338        self.state.v3 ^= self.tail;
339        S::c_rounds(&mut self.state);
340        self.state.v0 ^= self.tail;
341
342        self.ntail = size - needed;
343        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
344    }
345}
346
347impl<S: Sip> Hasher<S> {
348    #[inline]
349    pub fn finish128(&self) -> Hash128 {
350        let mut state = self.state;
351
352        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
353
354        state.v3 ^= b;
355        S::c_rounds(&mut state);
356        state.v0 ^= b;
357
358        state.v2 ^= 0xee;
359        S::d_rounds(&mut state);
360        let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
361
362        state.v1 ^= 0xdd;
363        S::d_rounds(&mut state);
364        let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
365
366        Hash128 { h1, h2 }
367    }
368}
369
370impl hash::Hasher for SipHasher {
371    #[inline]
372    fn write(&mut self, msg: &[u8]) {
373        self.0.write(msg)
374    }
375
376    #[inline]
377    fn finish(&self) -> u64 {
378        self.0.finish()
379    }
380
381    #[inline]
382    fn write_usize(&mut self, i: usize) {
383        self.0.write_usize(i);
384    }
385
386    #[inline]
387    fn write_u8(&mut self, i: u8) {
388        self.0.write_u8(i);
389    }
390
391    #[inline]
392    fn write_u16(&mut self, i: u16) {
393        self.0.write_u16(i);
394    }
395
396    #[inline]
397    fn write_u32(&mut self, i: u32) {
398        self.0.write_u32(i);
399    }
400
401    #[inline]
402    fn write_u64(&mut self, i: u64) {
403        self.0.write_u64(i);
404    }
405}
406
407impl hash::Hasher for SipHasher13 {
408    #[inline]
409    fn write(&mut self, msg: &[u8]) {
410        self.hasher.write(msg)
411    }
412
413    #[inline]
414    fn finish(&self) -> u64 {
415        self.hasher.finish()
416    }
417
418    #[inline]
419    fn write_usize(&mut self, i: usize) {
420        self.hasher.write_usize(i);
421    }
422
423    #[inline]
424    fn write_u8(&mut self, i: u8) {
425        self.hasher.write_u8(i);
426    }
427
428    #[inline]
429    fn write_u16(&mut self, i: u16) {
430        self.hasher.write_u16(i);
431    }
432
433    #[inline]
434    fn write_u32(&mut self, i: u32) {
435        self.hasher.write_u32(i);
436    }
437
438    #[inline]
439    fn write_u64(&mut self, i: u64) {
440        self.hasher.write_u64(i);
441    }
442}
443
444impl hash::Hasher for SipHasher24 {
445    #[inline]
446    fn write(&mut self, msg: &[u8]) {
447        self.hasher.write(msg)
448    }
449
450    #[inline]
451    fn finish(&self) -> u64 {
452        self.hasher.finish()
453    }
454
455    #[inline]
456    fn write_usize(&mut self, i: usize) {
457        self.hasher.write_usize(i);
458    }
459
460    #[inline]
461    fn write_u8(&mut self, i: u8) {
462        self.hasher.write_u8(i);
463    }
464
465    #[inline]
466    fn write_u16(&mut self, i: u16) {
467        self.hasher.write_u16(i);
468    }
469
470    #[inline]
471    fn write_u32(&mut self, i: u32) {
472        self.hasher.write_u32(i);
473    }
474
475    #[inline]
476    fn write_u64(&mut self, i: u64) {
477        self.hasher.write_u64(i);
478    }
479}
480
481impl<S: Sip> hash::Hasher for Hasher<S> {
482    #[inline]
483    fn write_usize(&mut self, i: usize) {
484        self.short_write(i, i.to_le() as u64);
485    }
486
487    #[inline]
488    fn write_u8(&mut self, i: u8) {
489        self.short_write(i, i as u64);
490    }
491
492    #[inline]
493    fn write_u32(&mut self, i: u32) {
494        self.short_write(i, i.to_le() as u64);
495    }
496
497    #[inline]
498    fn write_u64(&mut self, i: u64) {
499        self.short_write(i, i.to_le());
500    }
501
502    #[inline]
503    fn write(&mut self, msg: &[u8]) {
504        let length = msg.len();
505        self.length += length;
506
507        let mut needed = 0;
508
509        if self.ntail != 0 {
510            needed = 8 - self.ntail;
511            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
512            if length < needed {
513                self.ntail += length;
514                return;
515            } else {
516                self.state.v3 ^= self.tail;
517                S::c_rounds(&mut self.state);
518                self.state.v0 ^= self.tail;
519                self.ntail = 0;
520            }
521        }
522
523        // Buffered tail is now flushed, process new input.
524        let len = length - needed;
525        let left = len & 0x7;
526
527        let mut i = needed;
528        while i < len - left {
529            let mi = unsafe { load_int_le!(msg, i, u64) };
530
531            self.state.v3 ^= mi;
532            S::c_rounds(&mut self.state);
533            self.state.v0 ^= mi;
534
535            i += 8;
536        }
537
538        self.tail = unsafe { u8to64_le(msg, i, left) };
539        self.ntail = left;
540    }
541
542    #[inline]
543    fn finish(&self) -> u64 {
544        self.finish128().h2
545    }
546}
547
548impl<S: Sip> Clone for Hasher<S> {
549    #[inline]
550    fn clone(&self) -> Hasher<S> {
551        Hasher {
552            k0: self.k0,
553            k1: self.k1,
554            length: self.length,
555            state: self.state,
556            tail: self.tail,
557            ntail: self.ntail,
558            _marker: self._marker,
559        }
560    }
561}
562
563impl<S: Sip> Default for Hasher<S> {
564    /// Creates a `Hasher<S>` with the two initial keys set to 0.
565    #[inline]
566    fn default() -> Hasher<S> {
567        Hasher::new_with_keys(0, 0)
568    }
569}
570
571#[doc(hidden)]
572trait Sip {
573    fn c_rounds(_: &mut State);
574    fn d_rounds(_: &mut State);
575}
576
577#[derive(Debug, Clone, Copy, Default)]
578struct Sip13Rounds;
579
580impl Sip for Sip13Rounds {
581    #[inline]
582    fn c_rounds(state: &mut State) {
583        compress!(state);
584    }
585
586    #[inline]
587    fn d_rounds(state: &mut State) {
588        compress!(state);
589        compress!(state);
590        compress!(state);
591    }
592}
593
594#[derive(Debug, Clone, Copy, Default)]
595struct Sip24Rounds;
596
597impl Sip for Sip24Rounds {
598    #[inline]
599    fn c_rounds(state: &mut State) {
600        compress!(state);
601        compress!(state);
602    }
603
604    #[inline]
605    fn d_rounds(state: &mut State) {
606        compress!(state);
607        compress!(state);
608        compress!(state);
609        compress!(state);
610    }
611}
612
613impl Hash128 {
614    /// Convert into a 16-bytes vector
615    pub fn as_bytes(&self) -> [u8; 16] {
616        let mut bytes = [0u8; 16];
617        bytes[0..8].copy_from_slice(&self.h1.to_le_bytes());
618        bytes[8..16].copy_from_slice(&self.h2.to_le_bytes());
619        bytes
620    }
621
622    /// Convert into a `u128`
623    #[inline]
624    pub fn as_u128(&self) -> u128 {
625        let h1 = self.h1.to_le();
626        let h2 = self.h2.to_le();
627        h1 as u128 | ((h2 as u128) << 64)
628    }
629
630    /// Convert into `(u64, u64)`
631    #[inline]
632    pub fn as_u64(&self) -> (u64, u64) {
633        let h1 = self.h1.to_le();
634        let h2 = self.h2.to_le();
635        (h1, h2)
636    }
637}