Skip to main content

siphasher/
sip128.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash with a 128-bit output.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18
19use crate::common::{compress, load_int_le, u8to64_le};
20
21/// A 128-bit (2x64) hash output
22#[derive(Debug, Clone, Copy, Default)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Hash128 {
25    pub h1: u64,
26    pub h2: u64,
27}
28
29impl PartialEq for Hash128 {
30    /// Constant-time equality comparison to prevent timing attacks.
31    fn eq(&self, other: &Self) -> bool {
32        let x = (self.h1 ^ other.h1) | (self.h2 ^ other.h2);
33        unsafe { core::ptr::read_volatile(&x) == 0 }
34    }
35}
36
37impl Eq for Hash128 {}
38
39impl From<u128> for Hash128 {
40    fn from(v: u128) -> Self {
41        Hash128 {
42            h1: v as u64,
43            h2: (v >> 64) as u64,
44        }
45    }
46}
47
48impl From<Hash128> for u128 {
49    fn from(h: Hash128) -> u128 {
50        (h.h1 as u128) | ((h.h2 as u128) << 64)
51    }
52}
53
54/// An implementation of SipHash128 1-3.
55#[derive(Debug, Clone, Copy, Default)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57pub struct SipHasher13 {
58    hasher: Hasher<Sip13Rounds>,
59}
60
61/// An implementation of SipHash128 2-4.
62#[derive(Debug, Clone, Copy, Default)]
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64pub struct SipHasher24 {
65    hasher: Hasher<Sip24Rounds>,
66}
67
68/// An implementation of SipHash128 2-4.
69///
70/// SipHash is a general-purpose hashing function: it runs at a good
71/// speed (competitive with Spooky and City) and permits strong _keyed_
72/// hashing. This lets you key your hashtables from a strong RNG, such as
73/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
74///
75/// Although the SipHash algorithm is considered to be generally strong,
76/// it is not intended for cryptographic purposes. As such, all
77/// cryptographic uses of this implementation are _strongly discouraged_.
78#[derive(Debug, Clone, Copy, Default)]
79#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct SipHasher(SipHasher24);
81
82#[derive(Debug, Copy)]
83#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
84struct Hasher<S: Sip> {
85    k0: u64,
86    k1: u64,
87    length: usize, // how many bytes we've processed
88    state: State,  // hash State
89    tail: u64,     // unprocessed bytes le
90    ntail: usize,  // how many bytes in tail are valid
91    _marker: PhantomData<S>,
92}
93
94#[derive(Debug, Clone, Copy)]
95#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
96struct State {
97    // v0, v2 and v1, v3 show up in pairs in the algorithm,
98    // and simd implementations of SipHash will use vectors
99    // of v02 and v13. By placing them in this order in the struct,
100    // the compiler can pick up on just a few simd optimizations by itself.
101    v0: u64,
102    v2: u64,
103    v1: u64,
104    v3: u64,
105}
106
107pub trait Hasher128 {
108    /// Return a 128-bit hash
109    fn finish128(&self) -> Hash128;
110}
111
112impl SipHasher {
113    /// Creates a new `SipHasher` with the two initial keys set to 0.
114    #[inline]
115    pub fn new() -> SipHasher {
116        SipHasher::new_with_keys(0, 0)
117    }
118
119    /// Creates a `SipHasher` that is keyed off the provided keys.
120    #[inline]
121    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
122        SipHasher(SipHasher24::new_with_keys(key0, key1))
123    }
124
125    /// Creates a `SipHasher` from a 16 byte key.
126    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
127        let mut b0 = [0u8; 8];
128        let mut b1 = [0u8; 8];
129        b0.copy_from_slice(&key[0..8]);
130        b1.copy_from_slice(&key[8..16]);
131        let key0 = u64::from_le_bytes(b0);
132        let key1 = u64::from_le_bytes(b1);
133        Self::new_with_keys(key0, key1)
134    }
135
136    /// Get the keys used by this hasher
137    pub fn keys(&self) -> (u64, u64) {
138        (self.0.hasher.k0, self.0.hasher.k1)
139    }
140
141    /// Get the key used by this hasher as a 16 byte vector
142    pub fn key(&self) -> [u8; 16] {
143        let mut bytes = [0u8; 16];
144        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
145        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
146        bytes
147    }
148
149    /// Hash a byte array - This is the easiest and safest way to use SipHash.
150    #[inline]
151    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
152        self.0.hasher.hash128(bytes)
153    }
154}
155
156impl Hasher128 for SipHasher {
157    /// Return a 128-bit hash
158    #[inline]
159    fn finish128(&self) -> Hash128 {
160        self.0.finish128()
161    }
162}
163
164impl SipHasher13 {
165    /// Creates a new `SipHasher13` with the two initial keys set to 0.
166    #[inline]
167    pub fn new() -> SipHasher13 {
168        SipHasher13::new_with_keys(0, 0)
169    }
170
171    /// Creates a `SipHasher13` that is keyed off the provided keys.
172    #[inline]
173    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
174        SipHasher13 {
175            hasher: Hasher::new_with_keys(key0, key1),
176        }
177    }
178
179    /// Creates a `SipHasher13` from a 16 byte key.
180    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
181        let mut b0 = [0u8; 8];
182        let mut b1 = [0u8; 8];
183        b0.copy_from_slice(&key[0..8]);
184        b1.copy_from_slice(&key[8..16]);
185        let key0 = u64::from_le_bytes(b0);
186        let key1 = u64::from_le_bytes(b1);
187        Self::new_with_keys(key0, key1)
188    }
189
190    /// Get the keys used by this hasher
191    pub fn keys(&self) -> (u64, u64) {
192        (self.hasher.k0, self.hasher.k1)
193    }
194
195    /// Get the key used by this hasher as a 16 byte vector
196    pub fn key(&self) -> [u8; 16] {
197        let mut bytes = [0u8; 16];
198        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
199        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
200        bytes
201    }
202
203    /// Hash a byte array - This is the easiest and safest way to use SipHash.
204    #[inline]
205    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
206        self.hasher.hash128(bytes)
207    }
208}
209
210impl Hasher128 for SipHasher13 {
211    /// Return a 128-bit hash
212    #[inline]
213    fn finish128(&self) -> Hash128 {
214        self.hasher.finish128()
215    }
216}
217
218impl SipHasher24 {
219    /// Creates a new `SipHasher24` with the two initial keys set to 0.
220    #[inline]
221    pub fn new() -> SipHasher24 {
222        SipHasher24::new_with_keys(0, 0)
223    }
224
225    /// Creates a `SipHasher24` that is keyed off the provided keys.
226    #[inline]
227    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
228        SipHasher24 {
229            hasher: Hasher::new_with_keys(key0, key1),
230        }
231    }
232
233    /// Creates a `SipHasher24` from a 16 byte key.
234    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
235        let mut b0 = [0u8; 8];
236        let mut b1 = [0u8; 8];
237        b0.copy_from_slice(&key[0..8]);
238        b1.copy_from_slice(&key[8..16]);
239        let key0 = u64::from_le_bytes(b0);
240        let key1 = u64::from_le_bytes(b1);
241        Self::new_with_keys(key0, key1)
242    }
243
244    /// Get the keys used by this hasher
245    pub fn keys(&self) -> (u64, u64) {
246        (self.hasher.k0, self.hasher.k1)
247    }
248
249    /// Get the key used by this hasher as a 16 byte vector
250    pub fn key(&self) -> [u8; 16] {
251        let mut bytes = [0u8; 16];
252        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
253        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
254        bytes
255    }
256
257    /// Hash a byte array - This is the easiest and safest way to use SipHash.
258    #[inline]
259    pub fn hash(&self, bytes: &[u8]) -> Hash128 {
260        self.hasher.hash128(bytes)
261    }
262}
263
264impl Hasher128 for SipHasher24 {
265    /// Return a 128-bit hash
266    #[inline]
267    fn finish128(&self) -> Hash128 {
268        self.hasher.finish128()
269    }
270}
271
272impl<S: Sip> Hasher<S> {
273    #[inline]
274    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
275        let mut state = Hasher {
276            k0: key0,
277            k1: key1,
278            length: 0,
279            state: State {
280                v0: 0,
281                v1: 0xee,
282                v2: 0,
283                v3: 0,
284            },
285            tail: 0,
286            ntail: 0,
287            _marker: PhantomData,
288        };
289        state.reset();
290        state
291    }
292
293    #[inline]
294    fn reset(&mut self) {
295        self.length = 0;
296        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
297        self.state.v1 = self.k1 ^ 0x646f72616e646f83;
298        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
299        self.state.v3 = self.k1 ^ 0x7465646279746573;
300        self.ntail = 0;
301    }
302
303    // A specialized write function for values with size <= 8.
304    //
305    // The hashing of multi-byte integers depends on endianness. E.g.:
306    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
307    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
308    //
309    // This function does the right thing for little-endian hardware. On
310    // big-endian hardware `x` must be byte-swapped first to give the right
311    // behaviour. After any byte-swapping, the input must be zero-extended to
312    // 64-bits. The caller is responsible for the byte-swapping and
313    // zero-extension.
314    #[inline]
315    fn short_write<T>(&mut self, _x: T, x: u64) {
316        let size = mem::size_of::<T>();
317        self.length += size;
318
319        // The original number must be zero-extended, not sign-extended.
320        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
321
322        // The number of bytes needed to fill `self.tail`.
323        let needed = 8 - self.ntail;
324
325        self.tail |= x << (8 * self.ntail);
326        if size < needed {
327            self.ntail += size;
328            return;
329        }
330
331        // `self.tail` is full, process it.
332        self.state.v3 ^= self.tail;
333        S::c_rounds(&mut self.state);
334        self.state.v0 ^= self.tail;
335
336        self.ntail = size - needed;
337        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
338    }
339
340    #[inline]
341    fn hash128(&self, msg: &[u8]) -> Hash128 {
342        if self.ntail != 0 {
343            let mut hasher = self.clone();
344            hasher.write(msg);
345            return hasher.finish128();
346        }
347
348        let length = self.length + msg.len();
349        let len = msg.len();
350        let left = len & 0x7;
351        let mut state = self.state;
352        let mut i = 0;
353
354        while i < len - left {
355            let mi = unsafe { load_int_le!(msg, i, u64) };
356
357            state.v3 ^= mi;
358            S::c_rounds(&mut state);
359            state.v0 ^= mi;
360
361            i += 8;
362        }
363
364        let tail = unsafe { u8to64_le(msg, i, left) };
365        Self::finish128_with_state(state, length, tail)
366    }
367
368    #[inline]
369    fn finish128_with_state(mut state: State, length: usize, tail: u64) -> Hash128 {
370        let b: u64 = ((length as u64 & 0xff) << 56) | tail;
371
372        state.v3 ^= b;
373        S::c_rounds(&mut state);
374        state.v0 ^= b;
375
376        state.v2 ^= 0xee;
377        S::d_rounds(&mut state);
378        let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
379
380        state.v1 ^= 0xdd;
381        S::d_rounds(&mut state);
382        let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
383
384        Hash128 { h1, h2 }
385    }
386}
387
388impl<S: Sip> Hasher<S> {
389    #[inline]
390    pub fn finish128(&self) -> Hash128 {
391        Self::finish128_with_state(self.state, self.length, self.tail)
392    }
393}
394
395impl hash::Hasher for SipHasher {
396    #[inline]
397    fn write(&mut self, msg: &[u8]) {
398        self.0.write(msg)
399    }
400
401    #[inline]
402    fn finish(&self) -> u64 {
403        self.0.finish()
404    }
405
406    #[inline]
407    fn write_usize(&mut self, i: usize) {
408        self.0.write_usize(i);
409    }
410
411    #[inline]
412    fn write_u8(&mut self, i: u8) {
413        self.0.write_u8(i);
414    }
415
416    #[inline]
417    fn write_u16(&mut self, i: u16) {
418        self.0.write_u16(i);
419    }
420
421    #[inline]
422    fn write_u32(&mut self, i: u32) {
423        self.0.write_u32(i);
424    }
425
426    #[inline]
427    fn write_u64(&mut self, i: u64) {
428        self.0.write_u64(i);
429    }
430}
431
432impl hash::Hasher for SipHasher13 {
433    #[inline]
434    fn write(&mut self, msg: &[u8]) {
435        self.hasher.write(msg)
436    }
437
438    #[inline]
439    fn finish(&self) -> u64 {
440        self.hasher.finish()
441    }
442
443    #[inline]
444    fn write_usize(&mut self, i: usize) {
445        self.hasher.write_usize(i);
446    }
447
448    #[inline]
449    fn write_u8(&mut self, i: u8) {
450        self.hasher.write_u8(i);
451    }
452
453    #[inline]
454    fn write_u16(&mut self, i: u16) {
455        self.hasher.write_u16(i);
456    }
457
458    #[inline]
459    fn write_u32(&mut self, i: u32) {
460        self.hasher.write_u32(i);
461    }
462
463    #[inline]
464    fn write_u64(&mut self, i: u64) {
465        self.hasher.write_u64(i);
466    }
467}
468
469impl hash::Hasher for SipHasher24 {
470    #[inline]
471    fn write(&mut self, msg: &[u8]) {
472        self.hasher.write(msg)
473    }
474
475    #[inline]
476    fn finish(&self) -> u64 {
477        self.hasher.finish()
478    }
479
480    #[inline]
481    fn write_usize(&mut self, i: usize) {
482        self.hasher.write_usize(i);
483    }
484
485    #[inline]
486    fn write_u8(&mut self, i: u8) {
487        self.hasher.write_u8(i);
488    }
489
490    #[inline]
491    fn write_u16(&mut self, i: u16) {
492        self.hasher.write_u16(i);
493    }
494
495    #[inline]
496    fn write_u32(&mut self, i: u32) {
497        self.hasher.write_u32(i);
498    }
499
500    #[inline]
501    fn write_u64(&mut self, i: u64) {
502        self.hasher.write_u64(i);
503    }
504}
505
506impl<S: Sip> hash::Hasher for Hasher<S> {
507    #[inline]
508    fn write_usize(&mut self, i: usize) {
509        self.short_write(i, i.to_le() as u64);
510    }
511
512    #[inline]
513    fn write_u8(&mut self, i: u8) {
514        self.short_write(i, i as u64);
515    }
516
517    #[inline]
518    fn write_u16(&mut self, i: u16) {
519        self.short_write(i, i.to_le() as u64);
520    }
521
522    #[inline]
523    fn write_u32(&mut self, i: u32) {
524        self.short_write(i, i.to_le() as u64);
525    }
526
527    #[inline]
528    fn write_u64(&mut self, i: u64) {
529        self.short_write(i, i.to_le());
530    }
531
532    #[inline]
533    fn write(&mut self, msg: &[u8]) {
534        let length = msg.len();
535        self.length += length;
536
537        let mut needed = 0;
538
539        if self.ntail != 0 {
540            needed = 8 - self.ntail;
541            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
542            if length < needed {
543                self.ntail += length;
544                return;
545            } else {
546                self.state.v3 ^= self.tail;
547                S::c_rounds(&mut self.state);
548                self.state.v0 ^= self.tail;
549                self.ntail = 0;
550            }
551        }
552
553        // Buffered tail is now flushed, process new input.
554        let len = length - needed;
555        let left = len & 0x7;
556
557        let mut i = needed;
558        while i < len - left {
559            let mi = unsafe { load_int_le!(msg, i, u64) };
560
561            self.state.v3 ^= mi;
562            S::c_rounds(&mut self.state);
563            self.state.v0 ^= mi;
564
565            i += 8;
566        }
567
568        self.tail = unsafe { u8to64_le(msg, i, left) };
569        self.ntail = left;
570    }
571
572    #[inline]
573    fn finish(&self) -> u64 {
574        self.finish128().h2
575    }
576}
577
578impl<S: Sip> Clone for Hasher<S> {
579    #[inline]
580    fn clone(&self) -> Hasher<S> {
581        Hasher {
582            k0: self.k0,
583            k1: self.k1,
584            length: self.length,
585            state: self.state,
586            tail: self.tail,
587            ntail: self.ntail,
588            _marker: self._marker,
589        }
590    }
591}
592
593impl<S: Sip> Default for Hasher<S> {
594    /// Creates a `Hasher<S>` with the two initial keys set to 0.
595    #[inline]
596    fn default() -> Hasher<S> {
597        Hasher::new_with_keys(0, 0)
598    }
599}
600
601#[doc(hidden)]
602trait Sip {
603    fn c_rounds(_: &mut State);
604    fn d_rounds(_: &mut State);
605}
606
607#[derive(Debug, Clone, Copy, Default)]
608struct Sip13Rounds;
609
610impl Sip for Sip13Rounds {
611    #[inline]
612    fn c_rounds(state: &mut State) {
613        compress!(state);
614    }
615
616    #[inline]
617    fn d_rounds(state: &mut State) {
618        compress!(state);
619        compress!(state);
620        compress!(state);
621    }
622}
623
624#[derive(Debug, Clone, Copy, Default)]
625struct Sip24Rounds;
626
627impl Sip for Sip24Rounds {
628    #[inline]
629    fn c_rounds(state: &mut State) {
630        compress!(state);
631        compress!(state);
632    }
633
634    #[inline]
635    fn d_rounds(state: &mut State) {
636        compress!(state);
637        compress!(state);
638        compress!(state);
639        compress!(state);
640    }
641}
642
643impl Hash128 {
644    /// Convert into a 16-bytes vector
645    pub fn as_bytes(&self) -> [u8; 16] {
646        let mut bytes = [0u8; 16];
647        bytes[0..8].copy_from_slice(&self.h1.to_le_bytes());
648        bytes[8..16].copy_from_slice(&self.h2.to_le_bytes());
649        bytes
650    }
651
652    /// Convert into a `u128`
653    #[inline]
654    pub fn as_u128(&self) -> u128 {
655        let h1 = self.h1.to_le();
656        let h2 = self.h2.to_le();
657        h1 as u128 | ((h2 as u128) << 64)
658    }
659
660    /// Convert into `(u64, u64)`
661    #[inline]
662    pub fn as_u64(&self) -> (u64, u64) {
663        let h1 = self.h1.to_le();
664        let h2 = self.h2.to_le();
665        (h1, h2)
666    }
667}