fst/
map.rs

1use std::fmt;
2use std::io;
3use std::iter::{self, FromIterator};
4
5use crate::automaton::{AlwaysMatch, Automaton};
6use crate::raw;
7pub use crate::raw::IndexedValue;
8use crate::stream::{IntoStreamer, Streamer};
9use crate::Result;
10
11/// Map is a lexicographically ordered map from byte strings to integers.
12///
13/// A `Map` is constructed with the `MapBuilder` type. Alternatively, a `Map`
14/// can be constructed in memory from a lexicographically ordered iterator
15/// of key-value pairs (`Map::from_iter`).
16///
17/// A key feature of `Map` is that it can be serialized to disk compactly. Its
18/// underlying representation is built such that the `Map` can be memory mapped
19/// and searched without necessarily loading the entire map into memory.
20///
21/// It supports most common operations associated with maps, such as key
22/// lookup and search. It also supports set operations on its keys along with
23/// the ability to specify how conflicting values are merged together. Maps
24/// also support range queries and automata based searches (e.g. a regular
25/// expression).
26///
27/// Maps are represented by a finite state transducer where inputs are the keys
28/// and outputs are the values. As such, maps have the following invariants:
29///
30/// 1. Once constructed, a `Map` can never be modified.
31/// 2. Maps must be constructed with lexicographically ordered byte sequences.
32///    There is no restricting on the ordering of values.
33///
34/// # Differences with sets
35///
36/// Maps and sets are represented by the same underlying data structure: the
37/// finite state transducer. The principal difference between them is that
38/// sets always have their output values set to `0`. This has an impact on the
39/// representation size and is reflected in the type system for convenience.
40/// A secondary but subtle difference is that duplicate keys can be added
41/// to a set, but it is an error to do so with maps. That is, a set can have
42/// the same key added sequentially, but a map can't.
43///
44/// # The future
45///
46/// It is regrettable that the output value is fixed to `u64`. Indeed, it is
47/// not necessary, but it was a major simplification in the implementation.
48/// In the future, the value type may become generic to an extent (outputs must
49/// satisfy a basic algebra).
50///
51/// Keys will always be byte strings; however, we may grow more conveniences
52/// around dealing with them (such as a serialization/deserialization step,
53/// although it isn't clear where exactly this should live).
54#[derive(Clone)]
55pub struct Map<D>(raw::Fst<D>);
56
57impl Map<Vec<u8>> {
58    /// Create a `Map` from an iterator of lexicographically ordered byte
59    /// strings and associated values.
60    ///
61    /// If the iterator does not yield unique keys in lexicographic order, then
62    /// an error is returned.
63    ///
64    /// Note that this is a convenience function to build a map in memory.
65    /// To build a map that streams to an arbitrary `io::Write`, use
66    /// `MapBuilder`.
67    pub fn from_iter<K, I>(iter: I) -> Result<Map<Vec<u8>>>
68    where
69        K: AsRef<[u8]>,
70        I: IntoIterator<Item = (K, u64)>,
71    {
72        let mut builder = MapBuilder::memory();
73        builder.extend_iter(iter)?;
74        Map::new(builder.into_inner()?)
75    }
76}
77
78impl<D: AsRef<[u8]>> Map<D> {
79    /// Creates a map from its representation as a raw byte sequence.
80    ///
81    /// This accepts anything that can be cheaply converted to a `&[u8]`. The
82    /// caller is responsible for guaranteeing that the given bytes refer to
83    /// a valid FST. While memory safety will not be violated by invalid input,
84    /// a panic could occur while reading the FST at any point.
85    ///
86    /// # Example
87    ///
88    /// ```no_run
89    /// use fst::Map;
90    ///
91    /// // File written from a build script using MapBuilder.
92    /// # const IGNORE: &str = stringify! {
93    /// static FST: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/map.fst"));
94    /// # };
95    /// # static FST: &[u8] = &[];
96    ///
97    /// let map = Map::new(FST).unwrap();
98    /// ```
99    pub fn new(data: D) -> Result<Map<D>> {
100        raw::Fst::new(data).map(Map)
101    }
102
103    /// Tests the membership of a single key.
104    ///
105    /// # Example
106    ///
107    /// ```rust
108    /// use fst::Map;
109    ///
110    /// let map = Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3)]).unwrap();
111    ///
112    /// assert_eq!(map.contains_key("b"), true);
113    /// assert_eq!(map.contains_key("z"), false);
114    /// ```
115    pub fn contains_key<K: AsRef<[u8]>>(&self, key: K) -> bool {
116        self.0.contains_key(key)
117    }
118
119    /// Retrieves the value associated with a key.
120    ///
121    /// If the key does not exist, then `None` is returned.
122    ///
123    /// # Example
124    ///
125    /// ```rust
126    /// use fst::Map;
127    ///
128    /// let map = Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3)]).unwrap();
129    ///
130    /// assert_eq!(map.get("b"), Some(2));
131    /// assert_eq!(map.get("z"), None);
132    /// ```
133    pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<u64> {
134        self.0.get(key).map(|output| output.value())
135    }
136
137    /// Return a lexicographically ordered stream of all key-value pairs in
138    /// this map.
139    ///
140    /// While this is a stream, it does require heap space proportional to the
141    /// longest key in the map.
142    ///
143    /// If the map is memory mapped, then no further heap space is needed.
144    /// Note though that your operating system may fill your page cache
145    /// (which will cause the resident memory usage of the process to go up
146    /// correspondingly).
147    ///
148    /// # Example
149    ///
150    /// Since streams are not iterators, the traditional `for` loop cannot be
151    /// used. `while let` is useful instead:
152    ///
153    /// ```rust
154    /// use fst::{IntoStreamer, Streamer, Map};
155    ///
156    /// let map = Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3)]).unwrap();
157    /// let mut stream = map.stream();
158    ///
159    /// let mut kvs = vec![];
160    /// while let Some((k, v)) = stream.next() {
161    ///     kvs.push((k.to_vec(), v));
162    /// }
163    /// assert_eq!(kvs, vec![
164    ///     (b"a".to_vec(), 1),
165    ///     (b"b".to_vec(), 2),
166    ///     (b"c".to_vec(), 3),
167    /// ]);
168    /// ```
169    #[inline]
170    pub fn stream(&self) -> Stream<'_> {
171        Stream(self.0.stream())
172    }
173
174    /// Return a lexicographically ordered stream of all keys in this map.
175    ///
176    /// Memory requirements are the same as described on `Map::stream`.
177    ///
178    /// # Example
179    ///
180    /// ```rust
181    /// use fst::{IntoStreamer, Streamer, Map};
182    ///
183    /// let map = Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3)]).unwrap();
184    /// let mut stream = map.keys();
185    ///
186    /// let mut keys = vec![];
187    /// while let Some(k) = stream.next() {
188    ///     keys.push(k.to_vec());
189    /// }
190    /// assert_eq!(keys, vec![b"a", b"b", b"c"]);
191    /// ```
192    #[inline]
193    pub fn keys(&self) -> Keys<'_> {
194        Keys(self.0.stream())
195    }
196
197    /// Return a stream of all values in this map ordered lexicographically
198    /// by each value's corresponding key.
199    ///
200    /// Memory requirements are the same as described on `Map::stream`.
201    ///
202    /// # Example
203    ///
204    /// ```rust
205    /// use fst::{IntoStreamer, Streamer, Map};
206    ///
207    /// let map = Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3)]).unwrap();
208    /// let mut stream = map.values();
209    ///
210    /// let mut values = vec![];
211    /// while let Some(v) = stream.next() {
212    ///     values.push(v);
213    /// }
214    /// assert_eq!(values, vec![1, 2, 3]);
215    /// ```
216    #[inline]
217    pub fn values(&self) -> Values<'_> {
218        Values(self.0.stream())
219    }
220
221    /// Return a builder for range queries.
222    ///
223    /// A range query returns a subset of key-value pairs in this map in a
224    /// range given in lexicographic order.
225    ///
226    /// Memory requirements are the same as described on `Map::stream`.
227    /// Notably, only the keys in the range are read; keys outside the range
228    /// are not.
229    ///
230    /// # Example
231    ///
232    /// Returns only the key-value pairs in the range given.
233    ///
234    /// ```rust
235    /// use fst::{IntoStreamer, Streamer, Map};
236    ///
237    /// let map = Map::from_iter(vec![
238    ///     ("a", 1), ("b", 2), ("c", 3), ("d", 4), ("e", 5),
239    /// ]).unwrap();
240    /// let mut stream = map.range().ge("b").lt("e").into_stream();
241    ///
242    /// let mut kvs = vec![];
243    /// while let Some((k, v)) = stream.next() {
244    ///     kvs.push((k.to_vec(), v));
245    /// }
246    /// assert_eq!(kvs, vec![
247    ///     (b"b".to_vec(), 2),
248    ///     (b"c".to_vec(), 3),
249    ///     (b"d".to_vec(), 4),
250    /// ]);
251    /// ```
252    #[inline]
253    pub fn range(&self) -> StreamBuilder<'_> {
254        StreamBuilder(self.0.range())
255    }
256
257    /// Executes an automaton on the keys of this map.
258    ///
259    /// Note that this returns a `StreamBuilder`, which can be used to
260    /// add a range query to the search (see the `range` method).
261    ///
262    /// Memory requirements are the same as described on `Map::stream`.
263    ///
264    /// # Example
265    ///
266    /// An implementation of regular expressions for `Automaton` is available
267    /// in the `regex-automata` crate with the `fst1` feature enabled, which
268    /// can be used to search maps.
269    ///
270    /// # Example
271    ///
272    /// An implementation of subsequence search for `Automaton` can be used
273    /// to search maps:
274    ///
275    /// ```rust
276    /// use fst::automaton::Subsequence;
277    /// use fst::{IntoStreamer, Streamer, Map};
278    ///
279    /// # fn main() { example().unwrap(); }
280    /// fn example() -> Result<(), Box<dyn std::error::Error>> {
281    ///     let map = Map::from_iter(vec![
282    ///         ("a foo bar", 1),
283    ///         ("foo", 2),
284    ///         ("foo1", 3),
285    ///         ("foo2", 4),
286    ///         ("foo3", 5),
287    ///         ("foobar", 6),
288    ///     ]).unwrap();
289    ///
290    ///     let matcher = Subsequence::new("for");
291    ///     let mut stream = map.search(&matcher).into_stream();
292    ///
293    ///     let mut kvs = vec![];
294    ///     while let Some((k, v)) = stream.next() {
295    ///         kvs.push((String::from_utf8(k.to_vec())?, v));
296    ///     }
297    ///     assert_eq!(kvs, vec![
298    ///         ("a foo bar".to_string(), 1), ("foobar".to_string(), 6),
299    ///     ]);
300    ///
301    ///     Ok(())
302    /// }
303    /// ```
304    pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<'_, A> {
305        StreamBuilder(self.0.search(aut))
306    }
307
308    /// Executes an automaton on the keys of this map and yields matching
309    /// keys along with the corresponding matching states in the given
310    /// automaton.
311    ///
312    /// Note that this returns a `StreamWithStateBuilder`, which can be used to
313    /// add a range query to the search (see the `range` method).
314    ///
315    /// Memory requirements are the same as described on `Map::stream`.
316    ///
317    #[cfg_attr(
318        feature = "levenshtein",
319        doc = r##"
320# Example
321
322An implementation of fuzzy search using Levenshtein automata can be used
323to search maps:
324
325```rust
326use fst::automaton::Levenshtein;
327use fst::{IntoStreamer, Streamer, Map};
328
329# fn main() { example().unwrap(); }
330fn example() -> Result<(), Box<dyn std::error::Error>> {
331    let map = Map::from_iter(vec![
332        ("foo", 1),
333        ("foob", 2),
334        ("foobar", 3),
335        ("fozb", 4),
336    ]).unwrap();
337
338    let query = Levenshtein::new("foo", 2)?;
339    let mut stream = map.search_with_state(&query).into_stream();
340
341    let mut kvs = vec![];
342    while let Some((k, v, s)) = stream.next() {
343        kvs.push((String::from_utf8(k.to_vec())?, v, s));
344    }
345    // Currently, there isn't much interesting that you can do with the states.
346    assert_eq!(kvs, vec![
347        ("foo".to_string(), 1, Some(183)),
348        ("foob".to_string(), 2, Some(123)),
349        ("fozb".to_string(), 4, Some(83)),
350    ]);
351
352    Ok(())
353}
354```
355"##
356    )]
357    pub fn search_with_state<A: Automaton>(
358        &self,
359        aut: A,
360    ) -> StreamWithStateBuilder<'_, A> {
361        StreamWithStateBuilder(self.0.search_with_state(aut))
362    }
363
364    /// Returns the number of elements in this map.
365    #[inline]
366    pub fn len(&self) -> usize {
367        self.0.len()
368    }
369
370    /// Returns true if and only if this map is empty.
371    #[inline]
372    pub fn is_empty(&self) -> bool {
373        self.0.is_empty()
374    }
375
376    /// Creates a new map operation with this map added to it.
377    ///
378    /// The `OpBuilder` type can be used to add additional map streams
379    /// and perform set operations like union, intersection, difference and
380    /// symmetric difference on the keys of the map. These set operations also
381    /// allow one to specify how conflicting values are merged in the stream.
382    ///
383    /// # Example
384    ///
385    /// This example demonstrates a union on multiple map streams. Notice that
386    /// the stream returned from the union is not a sequence of key-value
387    /// pairs, but rather a sequence of keys associated with one or more
388    /// values. Namely, a key is associated with each value associated with
389    /// that same key in the all of the streams.
390    ///
391    /// ```rust
392    /// use fst::{Streamer, Map};
393    /// use fst::map::IndexedValue;
394    ///
395    /// let map1 = Map::from_iter(vec![
396    ///     ("a", 1), ("b", 2), ("c", 3),
397    /// ]).unwrap();
398    /// let map2 = Map::from_iter(vec![
399    ///     ("a", 10), ("y", 11), ("z", 12),
400    /// ]).unwrap();
401    ///
402    /// let mut union = map1.op().add(&map2).union();
403    ///
404    /// let mut kvs = vec![];
405    /// while let Some((k, vs)) = union.next() {
406    ///     kvs.push((k.to_vec(), vs.to_vec()));
407    /// }
408    /// assert_eq!(kvs, vec![
409    ///     (b"a".to_vec(), vec![
410    ///         IndexedValue { index: 0, value: 1 },
411    ///         IndexedValue { index: 1, value: 10 },
412    ///     ]),
413    ///     (b"b".to_vec(), vec![IndexedValue { index: 0, value: 2 }]),
414    ///     (b"c".to_vec(), vec![IndexedValue { index: 0, value: 3 }]),
415    ///     (b"y".to_vec(), vec![IndexedValue { index: 1, value: 11 }]),
416    ///     (b"z".to_vec(), vec![IndexedValue { index: 1, value: 12 }]),
417    /// ]);
418    /// ```
419    #[inline]
420    pub fn op(&self) -> OpBuilder<'_> {
421        OpBuilder::new().add(self)
422    }
423
424    /// Returns a reference to the underlying raw finite state transducer.
425    #[inline]
426    pub fn as_fst(&self) -> &raw::Fst<D> {
427        &self.0
428    }
429
430    /// Returns the underlying raw finite state transducer.
431    #[inline]
432    pub fn into_fst(self) -> raw::Fst<D> {
433        self.0
434    }
435
436    /// Maps the underlying data of the fst Map to another data type.
437    ///
438    /// # Example
439    ///
440    /// This example shows that you can map an fst Map based on a `Vec<u8>`
441    /// into an fst Map based on a `Cow<[u8]>`, it can also work with a
442    /// reference counted type (e.g. `Arc`, `Rc`).
443    ///
444    /// ```
445    /// use std::borrow::Cow;
446    ///
447    /// use fst::Map;
448    ///
449    /// let map: Map<Vec<u8>> = Map::from_iter(
450    ///     [("hello", 12), ("world", 42)].iter().cloned(),
451    /// ).unwrap();
452    ///
453    /// let map_on_cow: Map<Cow<[u8]>> = map.map_data(Cow::Owned).unwrap();
454    /// ```
455    #[inline]
456    pub fn map_data<F, T>(self, f: F) -> Result<Map<T>>
457    where
458        F: FnMut(D) -> T,
459        T: AsRef<[u8]>,
460    {
461        self.into_fst().map_data(f).map(Map::from)
462    }
463}
464
465impl Default for Map<Vec<u8>> {
466    #[inline]
467    fn default() -> Map<Vec<u8>> {
468        Map::from_iter(iter::empty::<(&[u8], u64)>()).unwrap()
469    }
470}
471
472impl<D: AsRef<[u8]>> fmt::Debug for Map<D> {
473    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
474        write!(f, "Map([")?;
475        let mut stream = self.stream();
476        let mut first = true;
477        while let Some((k, v)) = stream.next() {
478            if !first {
479                write!(f, ", ")?;
480            }
481            first = false;
482            write!(f, "({}, {})", String::from_utf8_lossy(k), v)?;
483        }
484        write!(f, "])")
485    }
486}
487
488// Construct a map from an Fst object.
489impl<D: AsRef<[u8]>> From<raw::Fst<D>> for Map<D> {
490    #[inline]
491    fn from(fst: raw::Fst<D>) -> Map<D> {
492        Map(fst)
493    }
494}
495
496/// Returns the underlying finite state transducer.
497impl<D: AsRef<[u8]>> AsRef<raw::Fst<D>> for Map<D> {
498    #[inline]
499    fn as_ref(&self) -> &raw::Fst<D> {
500        &self.0
501    }
502}
503
504impl<'m, 'a, D: AsRef<[u8]>> IntoStreamer<'a> for &'m Map<D> {
505    type Item = (&'a [u8], u64);
506    type Into = Stream<'m>;
507
508    #[inline]
509    fn into_stream(self) -> Stream<'m> {
510        Stream(self.0.stream())
511    }
512}
513
514/// A builder for creating a map.
515///
516/// This is not your average everyday builder. It has two important qualities
517/// that make it a bit unique from what you might expect:
518///
519/// 1. All keys must be added in lexicographic order. Adding a key out of order
520///    will result in an error. Additionally, adding a duplicate key will also
521///    result in an error. That is, once a key is associated with a value,
522///    that association can never be modified or deleted.
523/// 2. The representation of a map is streamed to *any* `io::Write` as it is
524///    built. For an in memory representation, this can be a `Vec<u8>`.
525///
526/// Point (2) is especially important because it means that a map can be
527/// constructed *without storing the entire map in memory*. Namely, since it
528/// works with any `io::Write`, it can be streamed directly to a file.
529///
530/// With that said, the builder does use memory, but **memory usage is bounded
531/// to a constant size**. The amount of memory used trades off with the
532/// compression ratio. Currently, the implementation hard codes this trade off
533/// which can result in about 5-20MB of heap usage during construction. (N.B.
534/// Guaranteeing a maximal compression ratio requires memory proportional to
535/// the size of the map, which defeats some of the benefit of streaming
536/// it to disk. In practice, a small bounded amount of memory achieves
537/// close-to-minimal compression ratios.)
538///
539/// The algorithmic complexity of map construction is `O(n)` where `n` is the
540/// number of elements added to the map.
541///
542/// # Example: build in memory
543///
544/// This shows how to use the builder to construct a map in memory. Note that
545/// `Map::from_iter` provides a convenience function that achieves this same
546/// goal without needing to explicitly use `MapBuilder`.
547///
548/// ```rust
549/// use fst::{IntoStreamer, Streamer, Map, MapBuilder};
550///
551/// let mut build = MapBuilder::memory();
552/// build.insert("bruce", 1).unwrap();
553/// build.insert("clarence", 2).unwrap();
554/// build.insert("stevie", 3).unwrap();
555///
556/// // You could also call `finish()` here, but since we're building the map in
557/// // memory, there would be no way to get the `Vec<u8>` back.
558/// let bytes = build.into_inner().unwrap();
559///
560/// // At this point, the map has been constructed, but here's how to read it.
561/// let map = Map::new(bytes).unwrap();
562/// let mut stream = map.into_stream();
563/// let mut kvs = vec![];
564/// while let Some((k, v)) = stream.next() {
565///     kvs.push((k.to_vec(), v));
566/// }
567/// assert_eq!(kvs, vec![
568///     (b"bruce".to_vec(), 1),
569///     (b"clarence".to_vec(), 2),
570///     (b"stevie".to_vec(), 3),
571/// ]);
572/// ```
573///
574/// # Example: stream to file
575///
576/// This shows how to do stream construction of a map to a file.
577///
578/// ```rust,no_run
579/// use std::fs::File;
580/// use std::io;
581///
582/// use fst::{IntoStreamer, Streamer, Map, MapBuilder};
583///
584/// let mut wtr = io::BufWriter::new(File::create("map.fst").unwrap());
585/// let mut build = MapBuilder::new(wtr).unwrap();
586/// build.insert("bruce", 1).unwrap();
587/// build.insert("clarence", 2).unwrap();
588/// build.insert("stevie", 3).unwrap();
589///
590/// // If you want the writer back, then call `into_inner`. Otherwise, this
591/// // will finish construction and call `flush`.
592/// build.finish().unwrap();
593///
594/// // At this point, the map has been constructed, but here's how to read it.
595/// // NOTE: Normally, one would memory map a file instead of reading its
596/// // entire contents on to the heap.
597/// let map = Map::new(std::fs::read("map.fst").unwrap()).unwrap();
598/// let mut stream = map.into_stream();
599/// let mut kvs = vec![];
600/// while let Some((k, v)) = stream.next() {
601///     kvs.push((k.to_vec(), v));
602/// }
603/// assert_eq!(kvs, vec![
604///     (b"bruce".to_vec(), 1),
605///     (b"clarence".to_vec(), 2),
606///     (b"stevie".to_vec(), 3),
607/// ]);
608/// ```
609pub struct MapBuilder<W>(raw::Builder<W>);
610
611impl MapBuilder<Vec<u8>> {
612    /// Create a builder that builds a map in memory.
613    #[inline]
614    pub fn memory() -> MapBuilder<Vec<u8>> {
615        MapBuilder(raw::Builder::memory())
616    }
617
618    /// Finishes the construction of the map and returns it.
619    #[inline]
620    pub fn into_map(self) -> Map<Vec<u8>> {
621        Map(self.0.into_fst())
622    }
623}
624
625impl<W: io::Write> MapBuilder<W> {
626    /// Create a builder that builds a map by writing it to `wtr` in a
627    /// streaming fashion.
628    pub fn new(wtr: W) -> Result<MapBuilder<W>> {
629        raw::Builder::new_type(wtr, 0).map(MapBuilder)
630    }
631
632    /// Insert a new key-value pair into the map.
633    ///
634    /// Keys must be convertible to byte strings. Values must be a `u64`, which
635    /// is a restriction of the current implementation of finite state
636    /// transducers. (Values may one day be expanded to other types.)
637    ///
638    /// If a key is inserted that is less than or equal to any previous key
639    /// added, then an error is returned. Similarly, if there was a problem
640    /// writing to the underlying writer, an error is returned.
641    pub fn insert<K: AsRef<[u8]>>(&mut self, key: K, val: u64) -> Result<()> {
642        self.0.insert(key, val)
643    }
644
645    /// Calls insert on each item in the iterator.
646    ///
647    /// If an error occurred while adding an element, processing is stopped
648    /// and the error is returned.
649    ///
650    /// If a key is inserted that is less than or equal to any previous key
651    /// added, then an error is returned. Similarly, if there was a problem
652    /// writing to the underlying writer, an error is returned.
653    pub fn extend_iter<K, I>(&mut self, iter: I) -> Result<()>
654    where
655        K: AsRef<[u8]>,
656        I: IntoIterator<Item = (K, u64)>,
657    {
658        self.0.extend_iter(
659            iter.into_iter().map(|(k, v)| (k, raw::Output::new(v))),
660        )
661    }
662
663    /// Calls insert on each item in the stream.
664    ///
665    /// Note that unlike `extend_iter`, this is not generic on the items in
666    /// the stream.
667    ///
668    /// If a key is inserted that is less than or equal to any previous key
669    /// added, then an error is returned. Similarly, if there was a problem
670    /// writing to the underlying writer, an error is returned.
671    pub fn extend_stream<'f, I, S>(&mut self, stream: I) -> Result<()>
672    where
673        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], u64)>,
674        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], u64)>,
675    {
676        self.0.extend_stream(StreamOutput(stream.into_stream()))
677    }
678
679    /// Finishes the construction of the map and flushes the underlying
680    /// writer. After completion, the data written to `W` may be read using
681    /// one of `Map`'s constructor methods.
682    pub fn finish(self) -> Result<()> {
683        self.0.finish()
684    }
685
686    /// Just like `finish`, except it returns the underlying writer after
687    /// flushing it.
688    pub fn into_inner(self) -> Result<W> {
689        self.0.into_inner()
690    }
691
692    /// Gets a reference to the underlying writer.
693    pub fn get_ref(&self) -> &W {
694        self.0.get_ref()
695    }
696
697    /// Returns the number of bytes written to the underlying writer
698    pub fn bytes_written(&self) -> u64 {
699        self.0.bytes_written()
700    }
701}
702
703/// A lexicographically ordered stream of key-value pairs from a map.
704///
705/// The `A` type parameter corresponds to an optional automaton to filter
706/// the stream. By default, no filtering is done.
707///
708/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
709pub struct Stream<'m, A = AlwaysMatch>(raw::Stream<'m, A>)
710where
711    A: Automaton;
712
713impl<'a, 'm, A: Automaton> Streamer<'a> for Stream<'m, A> {
714    type Item = (&'a [u8], u64);
715
716    fn next(&'a mut self) -> Option<(&'a [u8], u64)> {
717        self.0.next().map(|(key, out)| (key, out.value()))
718    }
719}
720
721impl<'m, A: Automaton> Stream<'m, A> {
722    /// Convert this stream into a vector of byte strings and outputs.
723    ///
724    /// Note that this creates a new allocation for every key in the stream.
725    pub fn into_byte_vec(self) -> Vec<(Vec<u8>, u64)> {
726        self.0.into_byte_vec()
727    }
728
729    /// Convert this stream into a vector of Unicode strings and outputs.
730    ///
731    /// If any key is not valid UTF-8, then iteration on the stream is stopped
732    /// and a UTF-8 decoding error is returned.
733    ///
734    /// Note that this creates a new allocation for every key in the stream.
735    pub fn into_str_vec(self) -> Result<Vec<(String, u64)>> {
736        self.0.into_str_vec()
737    }
738
739    /// Convert this stream into a vector of byte strings.
740    ///
741    /// Note that this creates a new allocation for every key in the stream.
742    pub fn into_byte_keys(self) -> Vec<Vec<u8>> {
743        self.0.into_byte_keys()
744    }
745
746    /// Convert this stream into a vector of Unicode strings.
747    ///
748    /// If any key is not valid UTF-8, then iteration on the stream is stopped
749    /// and a UTF-8 decoding error is returned.
750    ///
751    /// Note that this creates a new allocation for every key in the stream.
752    pub fn into_str_keys(self) -> Result<Vec<String>> {
753        self.0.into_str_keys()
754    }
755
756    /// Convert this stream into a vector of outputs.
757    pub fn into_values(self) -> Vec<u64> {
758        self.0.into_values()
759    }
760}
761
762/// A lexicographically ordered stream of key-value-state triples from a map
763/// and an automaton.
764///
765/// The key-values are from the map while the states are from the automaton.
766///
767/// The `A` type parameter corresponds to an optional automaton to filter
768/// the stream. By default, no filtering is done.
769///
770/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
771pub struct StreamWithState<'m, A = AlwaysMatch>(raw::StreamWithState<'m, A>)
772where
773    A: Automaton;
774
775impl<'a, 'm, A: 'a + Automaton> Streamer<'a> for StreamWithState<'m, A>
776where
777    A::State: Clone,
778{
779    type Item = (&'a [u8], u64, A::State);
780
781    fn next(&'a mut self) -> Option<(&'a [u8], u64, A::State)> {
782        self.0.next().map(|(key, out, state)| (key, out.value(), state))
783    }
784}
785
786/// A lexicographically ordered stream of keys from a map.
787///
788/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
789pub struct Keys<'m>(raw::Stream<'m>);
790
791impl<'a, 'm> Streamer<'a> for Keys<'m> {
792    type Item = &'a [u8];
793
794    #[inline]
795    fn next(&'a mut self) -> Option<&'a [u8]> {
796        self.0.next().map(|(key, _)| key)
797    }
798}
799
800/// A stream of values from a map, lexicographically ordered by each value's
801/// corresponding key.
802///
803/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
804pub struct Values<'m>(raw::Stream<'m>);
805
806impl<'a, 'm> Streamer<'a> for Values<'m> {
807    type Item = u64;
808
809    #[inline]
810    fn next(&'a mut self) -> Option<u64> {
811        self.0.next().map(|(_, out)| out.value())
812    }
813}
814
815/// A builder for constructing range queries on streams.
816///
817/// Once all bounds are set, one should call `into_stream` to get a
818/// `Stream`.
819///
820/// Bounds are not additive. That is, if `ge` is called twice on the same
821/// builder, then the second setting wins.
822///
823/// The `A` type parameter corresponds to an optional automaton to filter
824/// the stream. By default, no filtering is done.
825///
826/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
827pub struct StreamBuilder<'m, A = AlwaysMatch>(raw::StreamBuilder<'m, A>);
828
829impl<'m, A: Automaton> StreamBuilder<'m, A> {
830    /// Specify a greater-than-or-equal-to bound.
831    pub fn ge<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'m, A> {
832        StreamBuilder(self.0.ge(bound))
833    }
834
835    /// Specify a greater-than bound.
836    pub fn gt<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'m, A> {
837        StreamBuilder(self.0.gt(bound))
838    }
839
840    /// Specify a less-than-or-equal-to bound.
841    pub fn le<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'m, A> {
842        StreamBuilder(self.0.le(bound))
843    }
844
845    /// Specify a less-than bound.
846    pub fn lt<T: AsRef<[u8]>>(self, bound: T) -> StreamBuilder<'m, A> {
847        StreamBuilder(self.0.lt(bound))
848    }
849}
850
851impl<'m, 'a, A: Automaton> IntoStreamer<'a> for StreamBuilder<'m, A> {
852    type Item = (&'a [u8], u64);
853    type Into = Stream<'m, A>;
854
855    fn into_stream(self) -> Stream<'m, A> {
856        Stream(self.0.into_stream())
857    }
858}
859
860/// A builder for constructing range queries on streams that include automaton
861/// states.
862///
863/// In general, one should use `StreamBuilder` unless you have a specific need
864/// for accessing the states of the underlying automaton that is being used to
865/// filter this stream.
866///
867/// Once all bounds are set, one should call `into_stream` to get a
868/// `Stream`.
869///
870/// Bounds are not additive. That is, if `ge` is called twice on the same
871/// builder, then the second setting wins.
872///
873/// The `A` type parameter corresponds to an optional automaton to filter
874/// the stream. By default, no filtering is done.
875///
876/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
877pub struct StreamWithStateBuilder<'m, A = AlwaysMatch>(
878    raw::StreamWithStateBuilder<'m, A>,
879);
880
881impl<'m, A: Automaton> StreamWithStateBuilder<'m, A> {
882    /// Specify a greater-than-or-equal-to bound.
883    pub fn ge<T: AsRef<[u8]>>(
884        self,
885        bound: T,
886    ) -> StreamWithStateBuilder<'m, A> {
887        StreamWithStateBuilder(self.0.ge(bound))
888    }
889
890    /// Specify a greater-than bound.
891    pub fn gt<T: AsRef<[u8]>>(
892        self,
893        bound: T,
894    ) -> StreamWithStateBuilder<'m, A> {
895        StreamWithStateBuilder(self.0.gt(bound))
896    }
897
898    /// Specify a less-than-or-equal-to bound.
899    pub fn le<T: AsRef<[u8]>>(
900        self,
901        bound: T,
902    ) -> StreamWithStateBuilder<'m, A> {
903        StreamWithStateBuilder(self.0.le(bound))
904    }
905
906    /// Specify a less-than bound.
907    pub fn lt<T: AsRef<[u8]>>(
908        self,
909        bound: T,
910    ) -> StreamWithStateBuilder<'m, A> {
911        StreamWithStateBuilder(self.0.lt(bound))
912    }
913}
914
915impl<'m, 'a, A: 'a + Automaton> IntoStreamer<'a>
916    for StreamWithStateBuilder<'m, A>
917where
918    A::State: Clone,
919{
920    type Item = (&'a [u8], u64, A::State);
921    type Into = StreamWithState<'m, A>;
922
923    fn into_stream(self) -> StreamWithState<'m, A> {
924        StreamWithState(self.0.into_stream())
925    }
926}
927
928/// A builder for collecting map streams on which to perform set operations
929/// on the keys of maps.
930///
931/// Set operations include intersection, union, difference and symmetric
932/// difference. The result of each set operation is itself a stream that emits
933/// pairs of keys and a sequence of each occurrence of that key in the
934/// participating streams. This information allows one to perform set
935/// operations on maps and customize how conflicting output values are handled.
936///
937/// All set operations work efficiently on an arbitrary number of
938/// streams with memory proportional to the number of streams.
939///
940/// The algorithmic complexity of all set operations is `O(n1 + n2 + n3 + ...)`
941/// where `n1, n2, n3, ...` correspond to the number of elements in each
942/// stream.
943///
944/// The `'m` lifetime parameter refers to the lifetime of the underlying set.
945pub struct OpBuilder<'m>(raw::OpBuilder<'m>);
946
947impl<'m> OpBuilder<'m> {
948    /// Create a new set operation builder.
949    #[inline]
950    pub fn new() -> OpBuilder<'m> {
951        OpBuilder(raw::OpBuilder::new())
952    }
953
954    /// Add a stream to this set operation.
955    ///
956    /// This is useful for a chaining style pattern, e.g.,
957    /// `builder.add(stream1).add(stream2).union()`.
958    ///
959    /// The stream must emit a lexicographically ordered sequence of key-value
960    /// pairs.
961    pub fn add<I, S>(mut self, streamable: I) -> OpBuilder<'m>
962    where
963        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], u64)>,
964        S: 'm + for<'a> Streamer<'a, Item = (&'a [u8], u64)>,
965    {
966        self.push(streamable);
967        self
968    }
969
970    /// Add a stream to this set operation.
971    ///
972    /// The stream must emit a lexicographically ordered sequence of key-value
973    /// pairs.
974    pub fn push<I, S>(&mut self, streamable: I)
975    where
976        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], u64)>,
977        S: 'm + for<'a> Streamer<'a, Item = (&'a [u8], u64)>,
978    {
979        self.0.push(StreamOutput(streamable.into_stream()));
980    }
981
982    /// Performs a union operation on all streams that have been added.
983    ///
984    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
985    /// first element of the tuple is the byte string key. The second element
986    /// of the tuple is a list of all occurrences of that key in participating
987    /// streams. The `IndexedValue` contains an index and the value associated
988    /// with that key in that stream. The index uniquely identifies each
989    /// stream, which is an integer that is auto-incremented when a stream
990    /// is added to this operation (starting at `0`).
991    ///
992    /// # Example
993    ///
994    /// ```rust
995    /// use fst::{IntoStreamer, Streamer, Map};
996    /// use fst::map::IndexedValue;
997    ///
998    /// let map1 = Map::from_iter(vec![
999    ///     ("a", 1), ("b", 2), ("c", 3),
1000    /// ]).unwrap();
1001    /// let map2 = Map::from_iter(vec![
1002    ///     ("a", 11), ("y", 12), ("z", 13),
1003    /// ]).unwrap();
1004    ///
1005    /// let mut union = map1.op().add(&map2).union();
1006    ///
1007    /// let mut kvs = vec![];
1008    /// while let Some((k, vs)) = union.next() {
1009    ///     kvs.push((k.to_vec(), vs.to_vec()));
1010    /// }
1011    /// assert_eq!(kvs, vec![
1012    ///     (b"a".to_vec(), vec![
1013    ///         IndexedValue { index: 0, value: 1 },
1014    ///         IndexedValue { index: 1, value: 11 },
1015    ///     ]),
1016    ///     (b"b".to_vec(), vec![IndexedValue { index: 0, value: 2 }]),
1017    ///     (b"c".to_vec(), vec![IndexedValue { index: 0, value: 3 }]),
1018    ///     (b"y".to_vec(), vec![IndexedValue { index: 1, value: 12 }]),
1019    ///     (b"z".to_vec(), vec![IndexedValue { index: 1, value: 13 }]),
1020    /// ]);
1021    /// ```
1022    #[inline]
1023    pub fn union(self) -> Union<'m> {
1024        Union(self.0.union())
1025    }
1026
1027    /// Performs an intersection operation on all streams that have been added.
1028    ///
1029    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
1030    /// first element of the tuple is the byte string key. The second element
1031    /// of the tuple is a list of all occurrences of that key in participating
1032    /// streams. The `IndexedValue` contains an index and the value associated
1033    /// with that key in that stream. The index uniquely identifies each
1034    /// stream, which is an integer that is auto-incremented when a stream
1035    /// is added to this operation (starting at `0`).
1036    ///
1037    /// # Example
1038    ///
1039    /// ```rust
1040    /// use fst::{IntoStreamer, Streamer, Map};
1041    /// use fst::map::IndexedValue;
1042    ///
1043    /// let map1 = Map::from_iter(vec![
1044    ///     ("a", 1), ("b", 2), ("c", 3),
1045    /// ]).unwrap();
1046    /// let map2 = Map::from_iter(vec![
1047    ///     ("a", 11), ("y", 12), ("z", 13),
1048    /// ]).unwrap();
1049    ///
1050    /// let mut intersection = map1.op().add(&map2).intersection();
1051    ///
1052    /// let mut kvs = vec![];
1053    /// while let Some((k, vs)) = intersection.next() {
1054    ///     kvs.push((k.to_vec(), vs.to_vec()));
1055    /// }
1056    /// assert_eq!(kvs, vec![
1057    ///     (b"a".to_vec(), vec![
1058    ///         IndexedValue { index: 0, value: 1 },
1059    ///         IndexedValue { index: 1, value: 11 },
1060    ///     ]),
1061    /// ]);
1062    /// ```
1063    #[inline]
1064    pub fn intersection(self) -> Intersection<'m> {
1065        Intersection(self.0.intersection())
1066    }
1067
1068    /// Performs a difference operation with respect to the first stream added.
1069    /// That is, this returns a stream of all elements in the first stream
1070    /// that don't exist in any other stream that has been added.
1071    ///
1072    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
1073    /// first element of the tuple is the byte string key. The second element
1074    /// of the tuple is a list of all occurrences of that key in participating
1075    /// streams. The `IndexedValue` contains an index and the value associated
1076    /// with that key in that stream. The index uniquely identifies each
1077    /// stream, which is an integer that is auto-incremented when a stream
1078    /// is added to this operation (starting at `0`).
1079    ///
1080    /// While the interface is the same for all the operations combining multiple
1081    /// maps, due to the nature of `difference` there's exactly one `IndexValue`
1082    /// for each yielded value.
1083    ///
1084    /// # Example
1085    ///
1086    /// ```rust
1087    /// use fst::{Streamer, Map};
1088    /// use fst::map::IndexedValue;
1089    ///
1090    /// let map1 = Map::from_iter(vec![
1091    ///     ("a", 1), ("b", 2), ("c", 3),
1092    /// ]).unwrap();
1093    /// let map2 = Map::from_iter(vec![
1094    ///     ("a", 11), ("y", 12), ("z", 13),
1095    /// ]).unwrap();
1096    ///
1097    /// let mut difference = map1.op().add(&map2).difference();
1098    ///
1099    /// let mut kvs = vec![];
1100    /// while let Some((k, vs)) = difference.next() {
1101    ///     kvs.push((k.to_vec(), vs.to_vec()));
1102    /// }
1103    /// assert_eq!(kvs, vec![
1104    ///     (b"b".to_vec(), vec![IndexedValue { index: 0, value: 2 }]),
1105    ///     (b"c".to_vec(), vec![IndexedValue { index: 0, value: 3 }]),
1106    /// ]);
1107    /// ```
1108    #[inline]
1109    pub fn difference(self) -> Difference<'m> {
1110        Difference(self.0.difference())
1111    }
1112
1113    /// Performs a symmetric difference operation on all of the streams that
1114    /// have been added.
1115    ///
1116    /// When there are only two streams, then the keys returned correspond to
1117    /// keys that are in either stream but *not* in both streams.
1118    ///
1119    /// More generally, for any number of streams, keys that occur in an odd
1120    /// number of streams are returned.
1121    ///
1122    /// Note that this returns a stream of `(&[u8], &[IndexedValue])`. The
1123    /// first element of the tuple is the byte string key. The second element
1124    /// of the tuple is a list of all occurrences of that key in participating
1125    /// streams. The `IndexedValue` contains an index and the value associated
1126    /// with that key in that stream. The index uniquely identifies each
1127    /// stream, which is an integer that is auto-incremented when a stream
1128    /// is added to this operation (starting at `0`).
1129    ///
1130    /// # Example
1131    ///
1132    /// ```rust
1133    /// use fst::{IntoStreamer, Streamer, Map};
1134    /// use fst::map::IndexedValue;
1135    ///
1136    /// let map1 = Map::from_iter(vec![
1137    ///     ("a", 1), ("b", 2), ("c", 3),
1138    /// ]).unwrap();
1139    /// let map2 = Map::from_iter(vec![
1140    ///     ("a", 11), ("y", 12), ("z", 13),
1141    /// ]).unwrap();
1142    ///
1143    /// let mut sym_difference = map1.op().add(&map2).symmetric_difference();
1144    ///
1145    /// let mut kvs = vec![];
1146    /// while let Some((k, vs)) = sym_difference.next() {
1147    ///     kvs.push((k.to_vec(), vs.to_vec()));
1148    /// }
1149    /// assert_eq!(kvs, vec![
1150    ///     (b"b".to_vec(), vec![IndexedValue { index: 0, value: 2 }]),
1151    ///     (b"c".to_vec(), vec![IndexedValue { index: 0, value: 3 }]),
1152    ///     (b"y".to_vec(), vec![IndexedValue { index: 1, value: 12 }]),
1153    ///     (b"z".to_vec(), vec![IndexedValue { index: 1, value: 13 }]),
1154    /// ]);
1155    /// ```
1156    #[inline]
1157    pub fn symmetric_difference(self) -> SymmetricDifference<'m> {
1158        SymmetricDifference(self.0.symmetric_difference())
1159    }
1160}
1161
1162impl<'f, I, S> Extend<I> for OpBuilder<'f>
1163where
1164    I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], u64)>,
1165    S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], u64)>,
1166{
1167    fn extend<T>(&mut self, it: T)
1168    where
1169        T: IntoIterator<Item = I>,
1170    {
1171        for stream in it {
1172            self.push(stream);
1173        }
1174    }
1175}
1176
1177impl<'f, I, S> FromIterator<I> for OpBuilder<'f>
1178where
1179    I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], u64)>,
1180    S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], u64)>,
1181{
1182    fn from_iter<T>(it: T) -> OpBuilder<'f>
1183    where
1184        T: IntoIterator<Item = I>,
1185    {
1186        let mut op = OpBuilder::new();
1187        op.extend(it);
1188        op
1189    }
1190}
1191
1192/// A stream of set union over multiple map streams in lexicographic order.
1193///
1194/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1195pub struct Union<'m>(raw::Union<'m>);
1196
1197impl<'a, 'm> Streamer<'a> for Union<'m> {
1198    type Item = (&'a [u8], &'a [IndexedValue]);
1199
1200    #[inline]
1201    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
1202        self.0.next()
1203    }
1204}
1205
1206/// A stream of set intersection over multiple map streams in lexicographic
1207/// order.
1208///
1209/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1210pub struct Intersection<'m>(raw::Intersection<'m>);
1211
1212impl<'a, 'm> Streamer<'a> for Intersection<'m> {
1213    type Item = (&'a [u8], &'a [IndexedValue]);
1214
1215    #[inline]
1216    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
1217        self.0.next()
1218    }
1219}
1220
1221/// A stream of set difference over multiple map streams in lexicographic
1222/// order.
1223///
1224/// The difference operation is taken with respect to the first stream and the
1225/// rest of the streams. i.e., All elements in the first stream that do not
1226/// appear in any other streams.
1227///
1228/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1229pub struct Difference<'m>(raw::Difference<'m>);
1230
1231impl<'a, 'm> Streamer<'a> for Difference<'m> {
1232    type Item = (&'a [u8], &'a [IndexedValue]);
1233
1234    #[inline]
1235    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
1236        self.0.next()
1237    }
1238}
1239
1240/// A stream of set symmetric difference over multiple map streams in
1241/// lexicographic order.
1242///
1243/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1244pub struct SymmetricDifference<'m>(raw::SymmetricDifference<'m>);
1245
1246impl<'a, 'm> Streamer<'a> for SymmetricDifference<'m> {
1247    type Item = (&'a [u8], &'a [IndexedValue]);
1248
1249    #[inline]
1250    fn next(&'a mut self) -> Option<(&'a [u8], &'a [IndexedValue])> {
1251        self.0.next()
1252    }
1253}
1254
1255/// A specialized stream for mapping map streams (`(&[u8], u64)`) to streams
1256/// used by raw fsts (`(&[u8], Output)`).
1257///
1258/// If this were iterators, we could use `iter::Map`, but doing this on streams
1259/// requires HKT, so we need to write out the monomorphization ourselves.
1260struct StreamOutput<S>(S);
1261
1262impl<'a, S> Streamer<'a> for StreamOutput<S>
1263where
1264    S: Streamer<'a, Item = (&'a [u8], u64)>,
1265{
1266    type Item = (&'a [u8], raw::Output);
1267
1268    fn next(&'a mut self) -> Option<(&'a [u8], raw::Output)> {
1269        self.0.next().map(|(k, v)| (k, raw::Output::new(v)))
1270    }
1271}