Skip to main content

siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18
19use crate::common::{compress, load_int_le, u8to64_le};
20
21/// An implementation of SipHash 1-3.
22///
23/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24#[derive(Debug, Clone, Copy, Default)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct SipHasher13 {
27    hasher: Hasher<Sip13Rounds>,
28}
29
30/// An implementation of SipHash 2-4.
31///
32/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33#[derive(Debug, Clone, Copy, Default)]
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35pub struct SipHasher24 {
36    hasher: Hasher<Sip24Rounds>,
37}
38
39/// An implementation of SipHash 2-4.
40///
41/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42///
43/// SipHash is a general-purpose hashing function: it runs at a good
44/// speed (competitive with Spooky and City) and permits strong _keyed_
45/// hashing. This lets you key your hashtables from a strong RNG, such as
46/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47///
48/// Although the SipHash algorithm is considered to be generally strong,
49/// it is not intended for cryptographic purposes. As such, all
50/// cryptographic uses of this implementation are _strongly discouraged_.
51#[derive(Debug, Clone, Copy, Default)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct SipHasher(SipHasher24);
54
55#[derive(Debug, Clone, Copy)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57struct Hasher<S: Sip> {
58    k0: u64,
59    k1: u64,
60    length: usize, // how many bytes we've processed
61    state: State,  // hash State
62    tail: u64,     // unprocessed bytes le
63    ntail: usize,  // how many bytes in tail are valid
64    _marker: PhantomData<S>,
65}
66
67#[derive(Debug, Clone, Copy)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69struct State {
70    // v0, v2 and v1, v3 show up in pairs in the algorithm,
71    // and simd implementations of SipHash will use vectors
72    // of v02 and v13. By placing them in this order in the struct,
73    // the compiler can pick up on just a few simd optimizations by itself.
74    v0: u64,
75    v2: u64,
76    v1: u64,
77    v3: u64,
78}
79
80impl SipHasher {
81    /// Creates a new `SipHasher` with the two initial keys set to 0.
82    #[inline]
83    pub fn new() -> SipHasher {
84        SipHasher::new_with_keys(0, 0)
85    }
86
87    /// Creates a `SipHasher` that is keyed off the provided keys.
88    #[inline]
89    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
90        SipHasher(SipHasher24::new_with_keys(key0, key1))
91    }
92
93    /// Creates a `SipHasher` from a 16 byte key.
94    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
95        let mut b0 = [0u8; 8];
96        let mut b1 = [0u8; 8];
97        b0.copy_from_slice(&key[0..8]);
98        b1.copy_from_slice(&key[8..16]);
99        let key0 = u64::from_le_bytes(b0);
100        let key1 = u64::from_le_bytes(b1);
101        Self::new_with_keys(key0, key1)
102    }
103
104    /// Get the keys used by this hasher
105    pub fn keys(&self) -> (u64, u64) {
106        (self.0.hasher.k0, self.0.hasher.k1)
107    }
108
109    /// Get the key used by this hasher as a 16 byte vector
110    pub fn key(&self) -> [u8; 16] {
111        let mut bytes = [0u8; 16];
112        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
113        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
114        bytes
115    }
116
117    /// Hash a byte array - This is the easiest and safest way to use SipHash.
118    #[inline]
119    pub fn hash(&self, bytes: &[u8]) -> u64 {
120        self.0.hasher.hash(bytes)
121    }
122}
123
124impl SipHasher13 {
125    /// Creates a new `SipHasher13` with the two initial keys set to 0.
126    #[inline]
127    pub fn new() -> SipHasher13 {
128        SipHasher13::new_with_keys(0, 0)
129    }
130
131    /// Creates a `SipHasher13` that is keyed off the provided keys.
132    #[inline]
133    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
134        SipHasher13 {
135            hasher: Hasher::new_with_keys(key0, key1),
136        }
137    }
138
139    /// Creates a `SipHasher13` from a 16 byte key.
140    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
141        let mut b0 = [0u8; 8];
142        let mut b1 = [0u8; 8];
143        b0.copy_from_slice(&key[0..8]);
144        b1.copy_from_slice(&key[8..16]);
145        let key0 = u64::from_le_bytes(b0);
146        let key1 = u64::from_le_bytes(b1);
147        Self::new_with_keys(key0, key1)
148    }
149
150    /// Get the keys used by this hasher
151    pub fn keys(&self) -> (u64, u64) {
152        (self.hasher.k0, self.hasher.k1)
153    }
154
155    /// Get the key used by this hasher as a 16 byte vector
156    pub fn key(&self) -> [u8; 16] {
157        let mut bytes = [0u8; 16];
158        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
159        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
160        bytes
161    }
162
163    /// Hash a byte array - This is the easiest and safest way to use SipHash.
164    #[inline]
165    pub fn hash(&self, bytes: &[u8]) -> u64 {
166        self.hasher.hash(bytes)
167    }
168}
169
170impl SipHasher24 {
171    /// Creates a new `SipHasher24` with the two initial keys set to 0.
172    #[inline]
173    pub fn new() -> SipHasher24 {
174        SipHasher24::new_with_keys(0, 0)
175    }
176
177    /// Creates a `SipHasher24` that is keyed off the provided keys.
178    #[inline]
179    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
180        SipHasher24 {
181            hasher: Hasher::new_with_keys(key0, key1),
182        }
183    }
184
185    /// Creates a `SipHasher24` from a 16 byte key.
186    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
187        let mut b0 = [0u8; 8];
188        let mut b1 = [0u8; 8];
189        b0.copy_from_slice(&key[0..8]);
190        b1.copy_from_slice(&key[8..16]);
191        let key0 = u64::from_le_bytes(b0);
192        let key1 = u64::from_le_bytes(b1);
193        Self::new_with_keys(key0, key1)
194    }
195
196    /// Get the keys used by this hasher
197    pub fn keys(&self) -> (u64, u64) {
198        (self.hasher.k0, self.hasher.k1)
199    }
200
201    /// Get the key used by this hasher as a 16 byte vector
202    pub fn key(&self) -> [u8; 16] {
203        let mut bytes = [0u8; 16];
204        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
205        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
206        bytes
207    }
208
209    /// Hash a byte array - This is the easiest and safest way to use SipHash.
210    #[inline]
211    pub fn hash(&self, bytes: &[u8]) -> u64 {
212        self.hasher.hash(bytes)
213    }
214}
215
216impl<S: Sip> Hasher<S> {
217    #[inline]
218    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
219        let mut state = Hasher {
220            k0: key0,
221            k1: key1,
222            length: 0,
223            state: State {
224                v0: 0,
225                v1: 0,
226                v2: 0,
227                v3: 0,
228            },
229            tail: 0,
230            ntail: 0,
231            _marker: PhantomData,
232        };
233        state.reset();
234        state
235    }
236
237    #[inline]
238    fn reset(&mut self) {
239        self.length = 0;
240        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
241        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
242        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
243        self.state.v3 = self.k1 ^ 0x7465646279746573;
244        self.ntail = 0;
245    }
246
247    // A specialized write function for values with size <= 8.
248    //
249    // The hashing of multi-byte integers depends on endianness. E.g.:
250    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
251    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
252    //
253    // This function does the right thing for little-endian hardware. On
254    // big-endian hardware `x` must be byte-swapped first to give the right
255    // behaviour. After any byte-swapping, the input must be zero-extended to
256    // 64-bits. The caller is responsible for the byte-swapping and
257    // zero-extension.
258    #[inline]
259    fn short_write<T>(&mut self, _x: T, x: u64) {
260        let size = mem::size_of::<T>();
261        self.length += size;
262
263        // The original number must be zero-extended, not sign-extended.
264        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
265
266        // The number of bytes needed to fill `self.tail`.
267        let needed = 8 - self.ntail;
268
269        self.tail |= x << (8 * self.ntail);
270        if size < needed {
271            self.ntail += size;
272            return;
273        }
274
275        // `self.tail` is full, process it.
276        self.state.v3 ^= self.tail;
277        S::c_rounds(&mut self.state);
278        self.state.v0 ^= self.tail;
279
280        self.ntail = size - needed;
281        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
282    }
283
284    #[inline]
285    fn hash(&self, msg: &[u8]) -> u64 {
286        if self.ntail != 0 {
287            let mut hasher: Hasher<S> = Hasher {
288                k0: self.k0,
289                k1: self.k1,
290                length: self.length,
291                state: self.state,
292                tail: self.tail,
293                ntail: self.ntail,
294                _marker: PhantomData,
295            };
296            hasher.write(msg);
297            return hasher.finish();
298        }
299
300        let length = self.length + msg.len();
301        let len = msg.len();
302        let left = len & 0x7;
303        let mut state = self.state;
304        let mut i = 0;
305
306        while i < len - left {
307            let mi = unsafe { load_int_le!(msg, i, u64) };
308
309            state.v3 ^= mi;
310            S::c_rounds(&mut state);
311            state.v0 ^= mi;
312
313            i += 8;
314        }
315
316        let tail = unsafe { u8to64_le(msg, i, left) };
317        Self::finish_with_state(state, length, tail)
318    }
319
320    #[inline]
321    fn finish_with_state(mut state: State, length: usize, tail: u64) -> u64 {
322        let b: u64 = ((length as u64 & 0xff) << 56) | tail;
323
324        state.v3 ^= b;
325        S::c_rounds(&mut state);
326        state.v0 ^= b;
327
328        state.v2 ^= 0xff;
329        S::d_rounds(&mut state);
330
331        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
332    }
333}
334
335impl hash::Hasher for SipHasher {
336    #[inline]
337    fn write(&mut self, msg: &[u8]) {
338        self.0.write(msg)
339    }
340
341    #[inline]
342    fn finish(&self) -> u64 {
343        self.0.finish()
344    }
345
346    #[inline]
347    fn write_usize(&mut self, i: usize) {
348        self.0.write_usize(i);
349    }
350
351    #[inline]
352    fn write_u8(&mut self, i: u8) {
353        self.0.write_u8(i);
354    }
355
356    #[inline]
357    fn write_u16(&mut self, i: u16) {
358        self.0.write_u16(i);
359    }
360
361    #[inline]
362    fn write_u32(&mut self, i: u32) {
363        self.0.write_u32(i);
364    }
365
366    #[inline]
367    fn write_u64(&mut self, i: u64) {
368        self.0.write_u64(i);
369    }
370}
371
372impl hash::Hasher for SipHasher13 {
373    #[inline]
374    fn write(&mut self, msg: &[u8]) {
375        self.hasher.write(msg)
376    }
377
378    #[inline]
379    fn finish(&self) -> u64 {
380        self.hasher.finish()
381    }
382
383    #[inline]
384    fn write_usize(&mut self, i: usize) {
385        self.hasher.write_usize(i);
386    }
387
388    #[inline]
389    fn write_u8(&mut self, i: u8) {
390        self.hasher.write_u8(i);
391    }
392
393    #[inline]
394    fn write_u16(&mut self, i: u16) {
395        self.hasher.write_u16(i);
396    }
397
398    #[inline]
399    fn write_u32(&mut self, i: u32) {
400        self.hasher.write_u32(i);
401    }
402
403    #[inline]
404    fn write_u64(&mut self, i: u64) {
405        self.hasher.write_u64(i);
406    }
407}
408
409impl hash::Hasher for SipHasher24 {
410    #[inline]
411    fn write(&mut self, msg: &[u8]) {
412        self.hasher.write(msg)
413    }
414
415    #[inline]
416    fn finish(&self) -> u64 {
417        self.hasher.finish()
418    }
419
420    #[inline]
421    fn write_usize(&mut self, i: usize) {
422        self.hasher.write_usize(i);
423    }
424
425    #[inline]
426    fn write_u8(&mut self, i: u8) {
427        self.hasher.write_u8(i);
428    }
429
430    #[inline]
431    fn write_u16(&mut self, i: u16) {
432        self.hasher.write_u16(i);
433    }
434
435    #[inline]
436    fn write_u32(&mut self, i: u32) {
437        self.hasher.write_u32(i);
438    }
439
440    #[inline]
441    fn write_u64(&mut self, i: u64) {
442        self.hasher.write_u64(i);
443    }
444}
445
446impl<S: Sip> hash::Hasher for Hasher<S> {
447    #[inline]
448    fn write_usize(&mut self, i: usize) {
449        self.short_write(i, i.to_le() as u64);
450    }
451
452    #[inline]
453    fn write_u8(&mut self, i: u8) {
454        self.short_write(i, i as u64);
455    }
456
457    #[inline]
458    fn write_u16(&mut self, i: u16) {
459        self.short_write(i, i.to_le() as u64);
460    }
461
462    #[inline]
463    fn write_u32(&mut self, i: u32) {
464        self.short_write(i, i.to_le() as u64);
465    }
466
467    #[inline]
468    fn write_u64(&mut self, i: u64) {
469        self.short_write(i, i.to_le());
470    }
471
472    #[inline]
473    fn write(&mut self, msg: &[u8]) {
474        let length = msg.len();
475        self.length += length;
476
477        let mut needed = 0;
478
479        if self.ntail != 0 {
480            needed = 8 - self.ntail;
481            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
482            if length < needed {
483                self.ntail += length;
484                return;
485            } else {
486                self.state.v3 ^= self.tail;
487                S::c_rounds(&mut self.state);
488                self.state.v0 ^= self.tail;
489                self.ntail = 0;
490            }
491        }
492
493        // Buffered tail is now flushed, process new input.
494        let len = length - needed;
495        let left = len & 0x7;
496
497        let mut i = needed;
498        while i < len - left {
499            let mi = unsafe { load_int_le!(msg, i, u64) };
500
501            self.state.v3 ^= mi;
502            S::c_rounds(&mut self.state);
503            self.state.v0 ^= mi;
504
505            i += 8;
506        }
507
508        self.tail = unsafe { u8to64_le(msg, i, left) };
509        self.ntail = left;
510    }
511
512    #[inline]
513    fn finish(&self) -> u64 {
514        Self::finish_with_state(self.state, self.length, self.tail)
515    }
516}
517
518impl<S: Sip> Default for Hasher<S> {
519    /// Creates a `Hasher<S>` with the two initial keys set to 0.
520    #[inline]
521    fn default() -> Hasher<S> {
522        Hasher::new_with_keys(0, 0)
523    }
524}
525
526#[doc(hidden)]
527trait Sip {
528    fn c_rounds(_: &mut State);
529    fn d_rounds(_: &mut State);
530}
531
532#[derive(Debug, Clone, Copy, Default)]
533struct Sip13Rounds;
534
535impl Sip for Sip13Rounds {
536    #[inline]
537    fn c_rounds(state: &mut State) {
538        compress!(state);
539    }
540
541    #[inline]
542    fn d_rounds(state: &mut State) {
543        compress!(state);
544        compress!(state);
545        compress!(state);
546    }
547}
548
549#[derive(Debug, Clone, Copy, Default)]
550struct Sip24Rounds;
551
552impl Sip for Sip24Rounds {
553    #[inline]
554    fn c_rounds(state: &mut State) {
555        compress!(state);
556        compress!(state);
557    }
558
559    #[inline]
560    fn d_rounds(state: &mut State) {
561        compress!(state);
562        compress!(state);
563        compress!(state);
564        compress!(state);
565    }
566}