Skip to main content

storage/indexeddb/engines/sqlite/
encoding.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use storage_traits::indexeddb::IndexedDBKeyType;
6
7// Implementation is a port of:
8// https://searchfox.org/firefox-main/rev/55ec080e4a37b7ae1f89267063eccd361cdd232d/dom/indexedDB/Key.cpp#109-187
9
10#[repr(u8)]
11enum KeyType {
12    Number = 0x10,
13    Date = 0x20,
14    String = 0x30,
15    Binary = 0x40,
16    Array = 0x50,
17}
18
19impl From<KeyType> for u8 {
20    fn from(key_type: KeyType) -> Self {
21        key_type as u8
22    }
23}
24
25impl TryFrom<u8> for KeyType {
26    type Error = ();
27
28    fn try_from(value: u8) -> Result<Self, Self::Error> {
29        match value {
30            0x10 => Ok(KeyType::Number),
31            0x20 => Ok(KeyType::Date),
32            0x30 => Ok(KeyType::String),
33            0x40 => Ok(KeyType::Binary),
34            0x50 => Ok(KeyType::Array),
35            _ => Err(()),
36        }
37    }
38}
39
40/// When encoding floats, 64bit IEEE 754 are almost sortable, except that
41/// positive sort lower than negative, and negative sort descending. So we use
42/// the following encoding:
43///
44/// value < 0 ?
45/// (-to64bitInt(value)) :
46/// (to64bitInt(value) | 0x8000000000000000)
47pub fn serialize_number(n: f64) -> [u8; 8] {
48    let bits = n.to_bits();
49    let signbit = 1u64 << 63;
50    let number = if (bits & signbit) != 0 {
51        bits.wrapping_neg()
52    } else {
53        bits | signbit
54    };
55    number.to_be_bytes()
56}
57
58pub fn deserialize_number(bytes: &[u8]) -> Option<f64> {
59    let mut array = [0u8; 8];
60    let len = std::cmp::min(8, bytes.len());
61    array[..len].copy_from_slice(&bytes[..len]);
62    let number = u64::from_be_bytes(array);
63    let signbit = 1u64 << 63;
64    let bits = if (number & signbit) != 0 {
65        number & !signbit
66    } else {
67        number.wrapping_neg()
68    };
69    Some(f64::from_bits(bits))
70}
71
72/// When encoding strings, we use variable-size encoding per the following table
73///
74///  Chars 0         - 7E           are encoded as 0xxxxxxx with 1 added
75///  Chars 7F        - (3FFF+7F)    are encoded as 10xxxxxx xxxxxxxx with 7F subtracted
76///  Chars (3FFF+80) - FFFF         are encoded as 11xxxxxx xxxxxxxx xx000000
77fn encode_stringy<I: Iterator<Item = u16>>(iter: I, type_byte: u8, buffer: &mut Vec<u8>) {
78    buffer.push(type_byte);
79    for val in iter {
80        if val <= 0x7E {
81            buffer.push((val + 1) as u8);
82        } else if val <= 0x3FFF + 0x7F {
83            let c = val.wrapping_sub(0x7F).wrapping_add(0x8000);
84            buffer.push((c >> 8) as u8);
85            buffer.push((c & 0xFF) as u8);
86        } else {
87            let c = ((val as u32) << 6) | 0x00C00000;
88            buffer.push((c >> 16) as u8);
89            buffer.push((c >> 8) as u8);
90            buffer.push(c as u8);
91        }
92    }
93    buffer.push(0);
94}
95
96/// Tracks type offsets for arrays to ensure that we don't have collisions with type bytes. When we hit the limit, we push an extra byte to separate the levels.
97fn internal_serialize(key: &IndexedDBKeyType, type_offset: u8, buffer: &mut Vec<u8>) {
98    match key {
99        IndexedDBKeyType::Number(n) => {
100            buffer.push(KeyType::Number as u8 + type_offset);
101            buffer.extend_from_slice(&serialize_number(*n));
102        },
103        IndexedDBKeyType::Date(n) => {
104            buffer.push(KeyType::Date as u8 + type_offset);
105            buffer.extend_from_slice(&serialize_number(*n));
106        },
107        IndexedDBKeyType::String(s) => {
108            encode_stringy(
109                s.encode_utf16(),
110                KeyType::String as u8 + type_offset,
111                buffer,
112            );
113        },
114        IndexedDBKeyType::Binary(b) => {
115            encode_stringy(
116                b.iter().map(|&x| x as u16),
117                KeyType::Binary as u8 + type_offset,
118                buffer,
119            );
120        },
121        IndexedDBKeyType::Array(arr) => {
122            let mut offset = type_offset + KeyType::Array as u8;
123            if offset == KeyType::Array as u8 * 3 {
124                buffer.push(offset);
125                offset = 0;
126            }
127            for item in arr {
128                internal_serialize(item, offset, buffer);
129                offset = 0;
130            }
131            buffer.push(offset); // Array terminator
132        },
133    }
134}
135
136pub fn serialize(key: &IndexedDBKeyType) -> Vec<u8> {
137    let mut buffer = Vec::new();
138    internal_serialize(key, 0, &mut buffer);
139    while buffer.last() == Some(&0) {
140        buffer.pop();
141    }
142    buffer
143}
144
145fn decode_stringy(buffer: &[u8], pos: &mut usize) -> Option<Vec<u16>> {
146    let mut decoded = Vec::new();
147    while *pos < buffer.len() && buffer[*pos] != 0 {
148        let b = buffer[*pos];
149        if (b & 0x80) == 0 {
150            decoded.push((b - 1) as u16);
151            *pos += 1;
152        } else if (b & 0x40) == 0 {
153            let mut c = (b as u16) << 8;
154            *pos += 1;
155            if *pos < buffer.len() {
156                c |= buffer[*pos] as u16;
157                *pos += 1;
158            }
159            c = c.wrapping_sub(0x8000).wrapping_add(0x7F);
160            decoded.push(c);
161        } else {
162            let mut c = (b as u32) << 10;
163            *pos += 1;
164            if *pos < buffer.len() {
165                c |= (buffer[*pos] as u32) << 2;
166                *pos += 1;
167            }
168            if *pos < buffer.len() {
169                c |= (buffer[*pos] as u32) >> 6;
170                *pos += 1;
171            }
172            decoded.push(c as u16);
173        }
174    }
175    if *pos < buffer.len() && buffer[*pos] == 0 {
176        *pos += 1;
177    }
178    Some(decoded)
179}
180
181fn internal_deserialize(
182    buffer: &[u8],
183    pos: &mut usize,
184    mut type_offset: u8,
185) -> Option<IndexedDBKeyType> {
186    if *pos >= buffer.len() {
187        return None;
188    }
189    let key_type = buffer[*pos];
190    if key_type >= KeyType::Array as u8 + type_offset {
191        let mut arr = Vec::new();
192        type_offset += KeyType::Array as u8;
193        if type_offset == KeyType::Array as u8 * 3 {
194            *pos += 1;
195            type_offset = 0;
196        }
197        while *pos < buffer.len() && buffer[*pos] != type_offset {
198            arr.push(internal_deserialize(buffer, pos, type_offset)?);
199            type_offset = 0;
200        }
201        if *pos < buffer.len() {
202            *pos += 1;
203        }
204        Some(IndexedDBKeyType::Array(arr))
205    } else if key_type == KeyType::String as u8 + type_offset {
206        *pos += 1;
207        let u16s = decode_stringy(buffer, pos)?;
208        Some(IndexedDBKeyType::String(String::from_utf16(&u16s).ok()?))
209    } else if key_type == KeyType::Binary as u8 + type_offset {
210        *pos += 1;
211        let u16s = decode_stringy(buffer, pos)?;
212        let u8s: Option<Vec<u8>> = u16s.into_iter().map(|x| x.try_into().ok()).collect();
213        Some(IndexedDBKeyType::Binary(u8s?))
214    } else if key_type == KeyType::Date as u8 + type_offset {
215        *pos += 1;
216        let bytes_to_copy = std::cmp::min(8, buffer.len() - *pos);
217        let val = deserialize_number(&buffer[*pos..*pos + bytes_to_copy])?;
218        *pos += bytes_to_copy;
219        Some(IndexedDBKeyType::Date(val))
220    } else if key_type == KeyType::Number as u8 + type_offset {
221        *pos += 1;
222        let bytes_to_copy = std::cmp::min(8, buffer.len() - *pos);
223        let val = deserialize_number(&buffer[*pos..*pos + bytes_to_copy])?;
224        *pos += bytes_to_copy;
225        Some(IndexedDBKeyType::Number(val))
226    } else {
227        None
228    }
229}
230
231pub fn deserialize(data: &[u8]) -> Option<IndexedDBKeyType> {
232    let mut pos = 0;
233    internal_deserialize(data, &mut pos, 0)
234}
235
236#[cfg(test)]
237mod tests {
238    use storage_traits::indexeddb::IndexedDBKeyType;
239
240    use super::{deserialize, deserialize_number, serialize, serialize_number};
241
242    #[test]
243    fn test_number_roundtrip() {
244        let numbers = [
245            0.0,
246            -0.0,
247            1.0,
248            -1.0,
249            123.456,
250            -123.456,
251            f64::INFINITY,
252            f64::NEG_INFINITY,
253            f64::NAN,
254            f64::MAX,
255            f64::MIN,
256            f64::MIN_POSITIVE,
257        ];
258        for &number in &numbers {
259            let serialized = serialize_number(number);
260            let deserialized = deserialize_number(&serialized).unwrap();
261            if number.is_nan() {
262                assert!(deserialized.is_nan());
263            } else {
264                assert_eq!(number, deserialized);
265            }
266        }
267    }
268
269    #[test]
270    fn test_roundtrip() {
271        let keys = vec![
272            IndexedDBKeyType::Number(42.0),
273            IndexedDBKeyType::Date(1625077765.0),
274            IndexedDBKeyType::String("hello".to_string()),
275            IndexedDBKeyType::Binary(vec![1, 2, 3, 4]),
276            IndexedDBKeyType::Array(vec![
277                IndexedDBKeyType::Number(1.0),
278                IndexedDBKeyType::String("nested".to_string()),
279            ]),
280        ];
281        for key in &keys {
282            let serialized = serialize(key);
283            let deserialized = deserialize(&serialized)
284                .expect(format!("Failed to deserialize key: {:?}", key).as_str());
285            assert_eq!(key, &deserialized);
286        }
287    }
288
289    #[test]
290    fn test_sorting() {
291        let keys = vec![
292            // Number sorting
293            IndexedDBKeyType::Number(f64::NEG_INFINITY),
294            IndexedDBKeyType::Number(-100.0),
295            IndexedDBKeyType::Number(-1.0),
296            IndexedDBKeyType::Number(-0.0),
297            IndexedDBKeyType::Number(0.0),
298            IndexedDBKeyType::Number(1.0),
299            IndexedDBKeyType::Number(100.0),
300            IndexedDBKeyType::Number(f64::INFINITY),
301            // Date sorting
302            IndexedDBKeyType::Date(0.0),
303            IndexedDBKeyType::Date(100.0),
304            // String sorting
305            IndexedDBKeyType::String("".to_string()),
306            IndexedDBKeyType::String("\0".to_string()),
307            IndexedDBKeyType::String("a".to_string()),
308            IndexedDBKeyType::String("aa".to_string()),
309            IndexedDBKeyType::String("b".to_string()),
310            IndexedDBKeyType::String("ba".to_string()),
311            IndexedDBKeyType::String("c".to_string()),
312            IndexedDBKeyType::String("~".to_string()),
313            // Binary sorting
314            IndexedDBKeyType::Binary(vec![]),
315            IndexedDBKeyType::Binary(vec![0]),
316            IndexedDBKeyType::Binary(vec![1]),
317            IndexedDBKeyType::Binary(vec![1, 0]),
318            IndexedDBKeyType::Binary(vec![1, 1]),
319            IndexedDBKeyType::Binary(vec![2]),
320            IndexedDBKeyType::Binary(vec![255]),
321            // Array sorting
322            IndexedDBKeyType::Array(vec![]),
323            IndexedDBKeyType::Array(vec![IndexedDBKeyType::Number(0.0)]),
324            IndexedDBKeyType::Array(vec![IndexedDBKeyType::Number(1.0)]),
325            IndexedDBKeyType::Array(vec![
326                IndexedDBKeyType::Number(1.0),
327                IndexedDBKeyType::Number(2.0),
328            ]),
329            IndexedDBKeyType::Array(vec![IndexedDBKeyType::String("a".to_string())]),
330        ];
331
332        let serialized: Vec<Vec<u8>> = keys.iter().map(serialize).collect();
333        let mut sorted_serialized = serialized.clone();
334        sorted_serialized.sort();
335
336        // The bytes should be byte-wise sortable mirroring the original sort order
337        assert_eq!(serialized, sorted_serialized);
338    }
339}