siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18
19use crate::common::{compress, load_int_le, u8to64_le};
20
21/// An implementation of SipHash 1-3.
22///
23/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24#[derive(Debug, Clone, Copy, Default)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct SipHasher13 {
27    hasher: Hasher<Sip13Rounds>,
28}
29
30/// An implementation of SipHash 2-4.
31///
32/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33#[derive(Debug, Clone, Copy, Default)]
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35pub struct SipHasher24 {
36    hasher: Hasher<Sip24Rounds>,
37}
38
39/// An implementation of SipHash 2-4.
40///
41/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42///
43/// SipHash is a general-purpose hashing function: it runs at a good
44/// speed (competitive with Spooky and City) and permits strong _keyed_
45/// hashing. This lets you key your hashtables from a strong RNG, such as
46/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47///
48/// Although the SipHash algorithm is considered to be generally strong,
49/// it is not intended for cryptographic purposes. As such, all
50/// cryptographic uses of this implementation are _strongly discouraged_.
51#[derive(Debug, Clone, Copy, Default)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct SipHasher(SipHasher24);
54
55#[derive(Debug, Clone, Copy)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57struct Hasher<S: Sip> {
58    k0: u64,
59    k1: u64,
60    length: usize, // how many bytes we've processed
61    state: State,  // hash State
62    tail: u64,     // unprocessed bytes le
63    ntail: usize,  // how many bytes in tail are valid
64    _marker: PhantomData<S>,
65}
66
67#[derive(Debug, Clone, Copy)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69struct State {
70    // v0, v2 and v1, v3 show up in pairs in the algorithm,
71    // and simd implementations of SipHash will use vectors
72    // of v02 and v13. By placing them in this order in the struct,
73    // the compiler can pick up on just a few simd optimizations by itself.
74    v0: u64,
75    v2: u64,
76    v1: u64,
77    v3: u64,
78}
79
80impl SipHasher {
81    /// Creates a new `SipHasher` with the two initial keys set to 0.
82    #[inline]
83    pub fn new() -> SipHasher {
84        SipHasher::new_with_keys(0, 0)
85    }
86
87    /// Creates a `SipHasher` that is keyed off the provided keys.
88    #[inline]
89    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
90        SipHasher(SipHasher24::new_with_keys(key0, key1))
91    }
92
93    /// Creates a `SipHasher` from a 16 byte key.
94    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
95        let mut b0 = [0u8; 8];
96        let mut b1 = [0u8; 8];
97        b0.copy_from_slice(&key[0..8]);
98        b1.copy_from_slice(&key[8..16]);
99        let key0 = u64::from_le_bytes(b0);
100        let key1 = u64::from_le_bytes(b1);
101        Self::new_with_keys(key0, key1)
102    }
103
104    /// Get the keys used by this hasher
105    pub fn keys(&self) -> (u64, u64) {
106        (self.0.hasher.k0, self.0.hasher.k1)
107    }
108
109    /// Get the key used by this hasher as a 16 byte vector
110    pub fn key(&self) -> [u8; 16] {
111        let mut bytes = [0u8; 16];
112        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
113        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
114        bytes
115    }
116
117    /// Hash a byte array - This is the easiest and safest way to use SipHash.
118    #[inline]
119    pub fn hash(&self, bytes: &[u8]) -> u64 {
120        let mut hasher = self.0.hasher;
121        hasher.write(bytes);
122        hasher.finish()
123    }
124}
125
126impl SipHasher13 {
127    /// Creates a new `SipHasher13` with the two initial keys set to 0.
128    #[inline]
129    pub fn new() -> SipHasher13 {
130        SipHasher13::new_with_keys(0, 0)
131    }
132
133    /// Creates a `SipHasher13` that is keyed off the provided keys.
134    #[inline]
135    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
136        SipHasher13 {
137            hasher: Hasher::new_with_keys(key0, key1),
138        }
139    }
140
141    /// Creates a `SipHasher13` from a 16 byte key.
142    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
143        let mut b0 = [0u8; 8];
144        let mut b1 = [0u8; 8];
145        b0.copy_from_slice(&key[0..8]);
146        b1.copy_from_slice(&key[8..16]);
147        let key0 = u64::from_le_bytes(b0);
148        let key1 = u64::from_le_bytes(b1);
149        Self::new_with_keys(key0, key1)
150    }
151
152    /// Get the keys used by this hasher
153    pub fn keys(&self) -> (u64, u64) {
154        (self.hasher.k0, self.hasher.k1)
155    }
156
157    /// Get the key used by this hasher as a 16 byte vector
158    pub fn key(&self) -> [u8; 16] {
159        let mut bytes = [0u8; 16];
160        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
161        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
162        bytes
163    }
164
165    /// Hash a byte array - This is the easiest and safest way to use SipHash.
166    #[inline]
167    pub fn hash(&self, bytes: &[u8]) -> u64 {
168        let mut hasher = self.hasher;
169        hasher.write(bytes);
170        hasher.finish()
171    }
172}
173
174impl SipHasher24 {
175    /// Creates a new `SipHasher24` with the two initial keys set to 0.
176    #[inline]
177    pub fn new() -> SipHasher24 {
178        SipHasher24::new_with_keys(0, 0)
179    }
180
181    /// Creates a `SipHasher24` that is keyed off the provided keys.
182    #[inline]
183    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
184        SipHasher24 {
185            hasher: Hasher::new_with_keys(key0, key1),
186        }
187    }
188
189    /// Creates a `SipHasher24` from a 16 byte key.
190    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
191        let mut b0 = [0u8; 8];
192        let mut b1 = [0u8; 8];
193        b0.copy_from_slice(&key[0..8]);
194        b1.copy_from_slice(&key[8..16]);
195        let key0 = u64::from_le_bytes(b0);
196        let key1 = u64::from_le_bytes(b1);
197        Self::new_with_keys(key0, key1)
198    }
199
200    /// Get the keys used by this hasher
201    pub fn keys(&self) -> (u64, u64) {
202        (self.hasher.k0, self.hasher.k1)
203    }
204
205    /// Get the key used by this hasher as a 16 byte vector
206    pub fn key(&self) -> [u8; 16] {
207        let mut bytes = [0u8; 16];
208        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
209        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
210        bytes
211    }
212
213    /// Hash a byte array - This is the easiest and safest way to use SipHash.
214    #[inline]
215    pub fn hash(&self, bytes: &[u8]) -> u64 {
216        let mut hasher = self.hasher;
217        hasher.write(bytes);
218        hasher.finish()
219    }
220}
221
222impl<S: Sip> Hasher<S> {
223    #[inline]
224    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
225        let mut state = Hasher {
226            k0: key0,
227            k1: key1,
228            length: 0,
229            state: State {
230                v0: 0,
231                v1: 0,
232                v2: 0,
233                v3: 0,
234            },
235            tail: 0,
236            ntail: 0,
237            _marker: PhantomData,
238        };
239        state.reset();
240        state
241    }
242
243    #[inline]
244    fn reset(&mut self) {
245        self.length = 0;
246        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
247        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
248        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
249        self.state.v3 = self.k1 ^ 0x7465646279746573;
250        self.ntail = 0;
251    }
252
253    // A specialized write function for values with size <= 8.
254    //
255    // The hashing of multi-byte integers depends on endianness. E.g.:
256    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
257    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
258    //
259    // This function does the right thing for little-endian hardware. On
260    // big-endian hardware `x` must be byte-swapped first to give the right
261    // behaviour. After any byte-swapping, the input must be zero-extended to
262    // 64-bits. The caller is responsible for the byte-swapping and
263    // zero-extension.
264    #[inline]
265    fn short_write<T>(&mut self, _x: T, x: u64) {
266        let size = mem::size_of::<T>();
267        self.length += size;
268
269        // The original number must be zero-extended, not sign-extended.
270        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
271
272        // The number of bytes needed to fill `self.tail`.
273        let needed = 8 - self.ntail;
274
275        self.tail |= x << (8 * self.ntail);
276        if size < needed {
277            self.ntail += size;
278            return;
279        }
280
281        // `self.tail` is full, process it.
282        self.state.v3 ^= self.tail;
283        S::c_rounds(&mut self.state);
284        self.state.v0 ^= self.tail;
285
286        self.ntail = size - needed;
287        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
288    }
289}
290
291impl hash::Hasher for SipHasher {
292    #[inline]
293    fn write(&mut self, msg: &[u8]) {
294        self.0.write(msg)
295    }
296
297    #[inline]
298    fn finish(&self) -> u64 {
299        self.0.finish()
300    }
301
302    #[inline]
303    fn write_usize(&mut self, i: usize) {
304        self.0.write_usize(i);
305    }
306
307    #[inline]
308    fn write_u8(&mut self, i: u8) {
309        self.0.write_u8(i);
310    }
311
312    #[inline]
313    fn write_u16(&mut self, i: u16) {
314        self.0.write_u16(i);
315    }
316
317    #[inline]
318    fn write_u32(&mut self, i: u32) {
319        self.0.write_u32(i);
320    }
321
322    #[inline]
323    fn write_u64(&mut self, i: u64) {
324        self.0.write_u64(i);
325    }
326}
327
328impl hash::Hasher for SipHasher13 {
329    #[inline]
330    fn write(&mut self, msg: &[u8]) {
331        self.hasher.write(msg)
332    }
333
334    #[inline]
335    fn finish(&self) -> u64 {
336        self.hasher.finish()
337    }
338
339    #[inline]
340    fn write_usize(&mut self, i: usize) {
341        self.hasher.write_usize(i);
342    }
343
344    #[inline]
345    fn write_u8(&mut self, i: u8) {
346        self.hasher.write_u8(i);
347    }
348
349    #[inline]
350    fn write_u16(&mut self, i: u16) {
351        self.hasher.write_u16(i);
352    }
353
354    #[inline]
355    fn write_u32(&mut self, i: u32) {
356        self.hasher.write_u32(i);
357    }
358
359    #[inline]
360    fn write_u64(&mut self, i: u64) {
361        self.hasher.write_u64(i);
362    }
363}
364
365impl hash::Hasher for SipHasher24 {
366    #[inline]
367    fn write(&mut self, msg: &[u8]) {
368        self.hasher.write(msg)
369    }
370
371    #[inline]
372    fn finish(&self) -> u64 {
373        self.hasher.finish()
374    }
375
376    #[inline]
377    fn write_usize(&mut self, i: usize) {
378        self.hasher.write_usize(i);
379    }
380
381    #[inline]
382    fn write_u8(&mut self, i: u8) {
383        self.hasher.write_u8(i);
384    }
385
386    #[inline]
387    fn write_u16(&mut self, i: u16) {
388        self.hasher.write_u16(i);
389    }
390
391    #[inline]
392    fn write_u32(&mut self, i: u32) {
393        self.hasher.write_u32(i);
394    }
395
396    #[inline]
397    fn write_u64(&mut self, i: u64) {
398        self.hasher.write_u64(i);
399    }
400}
401
402impl<S: Sip> hash::Hasher for Hasher<S> {
403    #[inline]
404    fn write_usize(&mut self, i: usize) {
405        self.short_write(i, i.to_le() as u64);
406    }
407
408    #[inline]
409    fn write_u8(&mut self, i: u8) {
410        self.short_write(i, i as u64);
411    }
412
413    #[inline]
414    fn write_u32(&mut self, i: u32) {
415        self.short_write(i, i.to_le() as u64);
416    }
417
418    #[inline]
419    fn write_u64(&mut self, i: u64) {
420        self.short_write(i, i.to_le());
421    }
422
423    #[inline]
424    fn write(&mut self, msg: &[u8]) {
425        let length = msg.len();
426        self.length += length;
427
428        let mut needed = 0;
429
430        if self.ntail != 0 {
431            needed = 8 - self.ntail;
432            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
433            if length < needed {
434                self.ntail += length;
435                return;
436            } else {
437                self.state.v3 ^= self.tail;
438                S::c_rounds(&mut self.state);
439                self.state.v0 ^= self.tail;
440                self.ntail = 0;
441            }
442        }
443
444        // Buffered tail is now flushed, process new input.
445        let len = length - needed;
446        let left = len & 0x7;
447
448        let mut i = needed;
449        while i < len - left {
450            let mi = unsafe { load_int_le!(msg, i, u64) };
451
452            self.state.v3 ^= mi;
453            S::c_rounds(&mut self.state);
454            self.state.v0 ^= mi;
455
456            i += 8;
457        }
458
459        self.tail = unsafe { u8to64_le(msg, i, left) };
460        self.ntail = left;
461    }
462
463    #[inline]
464    fn finish(&self) -> u64 {
465        let mut state = self.state;
466
467        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
468
469        state.v3 ^= b;
470        S::c_rounds(&mut state);
471        state.v0 ^= b;
472
473        state.v2 ^= 0xff;
474        S::d_rounds(&mut state);
475
476        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
477    }
478}
479
480impl<S: Sip> Default for Hasher<S> {
481    /// Creates a `Hasher<S>` with the two initial keys set to 0.
482    #[inline]
483    fn default() -> Hasher<S> {
484        Hasher::new_with_keys(0, 0)
485    }
486}
487
488#[doc(hidden)]
489trait Sip {
490    fn c_rounds(_: &mut State);
491    fn d_rounds(_: &mut State);
492}
493
494#[derive(Debug, Clone, Copy, Default)]
495struct Sip13Rounds;
496
497impl Sip for Sip13Rounds {
498    #[inline]
499    fn c_rounds(state: &mut State) {
500        compress!(state);
501    }
502
503    #[inline]
504    fn d_rounds(state: &mut State) {
505        compress!(state);
506        compress!(state);
507        compress!(state);
508    }
509}
510
511#[derive(Debug, Clone, Copy, Default)]
512struct Sip24Rounds;
513
514impl Sip for Sip24Rounds {
515    #[inline]
516    fn c_rounds(state: &mut State) {
517        compress!(state);
518        compress!(state);
519    }
520
521    #[inline]
522    fn d_rounds(state: &mut State) {
523        compress!(state);
524        compress!(state);
525        compress!(state);
526        compress!(state);
527    }
528}