fst/raw/
node.rs

1use std::cmp;
2use std::fmt;
3use std::io;
4use std::ops::Range;
5
6use crate::bytes;
7use crate::raw::build::BuilderNode;
8use crate::raw::common_inputs::{COMMON_INPUTS, COMMON_INPUTS_INV};
9use crate::raw::{
10    u64_to_usize, CompiledAddr, Output, Transition, EMPTY_ADDRESS,
11};
12
13/// The threshold (in number of transitions) at which an index is created for
14/// a node's transitions. This speeds up lookup time at the expense of FST
15/// size.
16const TRANS_INDEX_THRESHOLD: usize = 32;
17
18/// Node represents a single state in a finite state transducer.
19///
20/// Nodes are very cheap to construct. Notably, they satisfy the `Copy` trait.
21#[derive(Clone, Copy)]
22pub struct Node<'f> {
23    data: &'f [u8],
24    version: u64,
25    state: State,
26    start: CompiledAddr,
27    end: usize,
28    is_final: bool,
29    ntrans: usize,
30    sizes: PackSizes,
31    final_output: Output,
32}
33
34impl<'f> fmt::Debug for Node<'f> {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        writeln!(f, "NODE@{}", self.start)?;
37        writeln!(f, "  end_addr: {}", self.end)?;
38        writeln!(f, "  size: {} bytes", self.as_slice().len())?;
39        writeln!(f, "  state: {:?}", self.state)?;
40        writeln!(f, "  is_final: {}", self.is_final())?;
41        writeln!(f, "  final_output: {:?}", self.final_output())?;
42        writeln!(f, "  # transitions: {}", self.len())?;
43        writeln!(f, "  transitions:")?;
44        for t in self.transitions() {
45            writeln!(f, "    {:?}", t)?;
46        }
47        Ok(())
48    }
49}
50
51impl<'f> Node<'f> {
52    /// Creates a new note at the address given.
53    ///
54    /// `data` should be a slice to an entire FST.
55    pub(crate) fn new(
56        version: u64,
57        addr: CompiledAddr,
58        data: &[u8],
59    ) -> Node<'_> {
60        let state = State::new(data, addr);
61        match state {
62            State::EmptyFinal => Node {
63                data: &[],
64                version,
65                state: State::EmptyFinal,
66                start: EMPTY_ADDRESS,
67                end: EMPTY_ADDRESS,
68                is_final: true,
69                ntrans: 0,
70                sizes: PackSizes::new(),
71                final_output: Output::zero(),
72            },
73            State::OneTransNext(s) => {
74                let data = &data[..addr + 1];
75                Node {
76                    data,
77                    version,
78                    state,
79                    start: addr,
80                    end: s.end_addr(data),
81                    is_final: false,
82                    sizes: PackSizes::new(),
83                    ntrans: 1,
84                    final_output: Output::zero(),
85                }
86            }
87            State::OneTrans(s) => {
88                let data = &data[..addr + 1];
89                let sizes = s.sizes(data);
90                Node {
91                    data,
92                    version,
93                    state,
94                    start: addr,
95                    end: s.end_addr(data, sizes),
96                    is_final: false,
97                    ntrans: 1,
98                    sizes,
99                    final_output: Output::zero(),
100                }
101            }
102            State::AnyTrans(s) => {
103                let data = &data[..addr + 1];
104                let sizes = s.sizes(data);
105                let ntrans = s.ntrans(data);
106                Node {
107                    data,
108                    version,
109                    state,
110                    start: addr,
111                    end: s.end_addr(version, data, sizes, ntrans),
112                    is_final: s.is_final_state(),
113                    ntrans,
114                    sizes,
115                    final_output: s.final_output(version, data, sizes, ntrans),
116                }
117            }
118        }
119    }
120    /// Returns an iterator over all transitions in this node in lexicographic
121    /// order.
122    #[inline]
123    pub fn transitions<'n>(&'n self) -> Transitions<'f, 'n> {
124        Transitions { node: self, range: 0..self.len() }
125    }
126
127    /// Returns the transition at index `i`.
128    #[inline(always)]
129    pub fn transition(&self, i: usize) -> Transition {
130        // The `inline(always)` annotation on this function appears to
131        // dramatically speed up FST traversal. In particular, measuring the
132        // time it takes to run `fst range something-big.fst` shows almost a 2x
133        // improvement with this single `inline(always)` annotation.
134
135        match self.state {
136            State::OneTransNext(s) => {
137                assert!(i == 0);
138                Transition {
139                    inp: s.input(self),
140                    out: Output::zero(),
141                    addr: s.trans_addr(self),
142                }
143            }
144            State::OneTrans(s) => {
145                assert!(i == 0);
146                Transition {
147                    inp: s.input(self),
148                    out: s.output(self),
149                    addr: s.trans_addr(self),
150                }
151            }
152            State::AnyTrans(s) => Transition {
153                inp: s.input(self, i),
154                out: s.output(self, i),
155                addr: s.trans_addr(self, i),
156            },
157            State::EmptyFinal => panic!("out of bounds"),
158        }
159    }
160
161    /// Returns the transition address of the `i`th transition.
162    #[inline]
163    pub fn transition_addr(&self, i: usize) -> CompiledAddr {
164        match self.state {
165            State::OneTransNext(s) => {
166                assert!(i == 0);
167                s.trans_addr(self)
168            }
169            State::OneTrans(s) => {
170                assert!(i == 0);
171                s.trans_addr(self)
172            }
173            State::AnyTrans(s) => s.trans_addr(self, i),
174            State::EmptyFinal => panic!("out of bounds"),
175        }
176    }
177
178    /// Finds the `i`th transition corresponding to the given input byte.
179    ///
180    /// If no transition for this byte exists, then `None` is returned.
181    #[inline]
182    pub fn find_input(&self, b: u8) -> Option<usize> {
183        match self.state {
184            State::OneTransNext(s) if s.input(self) == b => Some(0),
185            State::OneTransNext(_) => None,
186            State::OneTrans(s) if s.input(self) == b => Some(0),
187            State::OneTrans(_) => None,
188            State::AnyTrans(s) => s.find_input(self, b),
189            State::EmptyFinal => None,
190        }
191    }
192
193    /// If this node is final and has a terminal output value, then it is
194    /// returned. Otherwise, a zero output is returned.
195    #[inline]
196    pub fn final_output(&self) -> Output {
197        self.final_output
198    }
199
200    /// Returns true if and only if this node corresponds to a final or "match"
201    /// state in the finite state transducer.
202    #[inline]
203    pub fn is_final(&self) -> bool {
204        self.is_final
205    }
206
207    /// Returns the number of transitions in this node.
208    ///
209    /// The maximum number of transitions is 256.
210    #[inline]
211    pub fn len(&self) -> usize {
212        self.ntrans
213    }
214
215    /// Returns true if and only if this node has zero transitions.
216    #[inline]
217    pub fn is_empty(&self) -> bool {
218        self.ntrans == 0
219    }
220
221    /// Return the address of this node.
222    #[inline]
223    pub fn addr(&self) -> CompiledAddr {
224        self.start
225    }
226
227    #[doc(hidden)]
228    #[inline]
229    pub fn as_slice(&self) -> &[u8] {
230        &self.data[self.end..]
231    }
232
233    #[doc(hidden)]
234    #[inline]
235    pub fn state(&self) -> &'static str {
236        match self.state {
237            State::OneTransNext(_) => "OTN",
238            State::OneTrans(_) => "OT",
239            State::AnyTrans(_) => "AT",
240            State::EmptyFinal => "EF",
241        }
242    }
243
244    fn compile<W: io::Write>(
245        wtr: W,
246        last_addr: CompiledAddr,
247        addr: CompiledAddr,
248        node: &BuilderNode,
249    ) -> io::Result<()> {
250        assert!(node.trans.len() <= 256);
251        if node.trans.is_empty()
252            && node.is_final
253            && node.final_output.is_zero()
254        {
255            return Ok(());
256        } else if node.trans.len() != 1 || node.is_final {
257            StateAnyTrans::compile(wtr, addr, node)
258        } else {
259            if node.trans[0].addr == last_addr && node.trans[0].out.is_zero() {
260                StateOneTransNext::compile(wtr, addr, node.trans[0].inp)
261            } else {
262                StateOneTrans::compile(wtr, addr, node.trans[0])
263            }
264        }
265    }
266}
267
268impl BuilderNode {
269    pub fn compile_to<W: io::Write>(
270        &self,
271        wtr: W,
272        last_addr: CompiledAddr,
273        addr: CompiledAddr,
274    ) -> io::Result<()> {
275        Node::compile(wtr, last_addr, addr, self)
276    }
277}
278
279#[derive(Clone, Copy, Debug)]
280enum State {
281    OneTransNext(StateOneTransNext),
282    OneTrans(StateOneTrans),
283    AnyTrans(StateAnyTrans),
284    EmptyFinal,
285}
286
287// one trans flag (1), next flag (1), common input
288#[derive(Clone, Copy, Debug)]
289struct StateOneTransNext(u8);
290// one trans flag (1), next flag (0), common input
291#[derive(Clone, Copy, Debug)]
292struct StateOneTrans(u8);
293// one trans flag (0), final flag, # transitions
294#[derive(Clone, Copy, Debug)]
295struct StateAnyTrans(u8);
296
297impl State {
298    fn new(data: &[u8], addr: CompiledAddr) -> State {
299        if addr == EMPTY_ADDRESS {
300            return State::EmptyFinal;
301        }
302        let v = data[addr];
303        match (v & 0b11_000000) >> 6 {
304            0b11 => State::OneTransNext(StateOneTransNext(v)),
305            0b10 => State::OneTrans(StateOneTrans(v)),
306            _ => State::AnyTrans(StateAnyTrans(v)),
307        }
308    }
309}
310
311impl StateOneTransNext {
312    fn compile<W: io::Write>(
313        mut wtr: W,
314        _: CompiledAddr,
315        input: u8,
316    ) -> io::Result<()> {
317        let mut state = StateOneTransNext::new();
318        state.set_common_input(input);
319        if state.common_input().is_none() {
320            wtr.write_all(&[input])?;
321        }
322        wtr.write_all(&[state.0])?;
323        Ok(())
324    }
325
326    #[inline]
327    fn new() -> StateOneTransNext {
328        StateOneTransNext(0b11_000000)
329    }
330
331    #[inline]
332    fn set_common_input(&mut self, input: u8) {
333        self.0 = (self.0 & 0b11_000000) | common_idx(input, 0b111111);
334    }
335
336    #[inline]
337    fn common_input(&self) -> Option<u8> {
338        common_input(self.0 & 0b00_111111)
339    }
340
341    #[inline]
342    fn input_len(&self) -> usize {
343        if self.common_input().is_none() {
344            1
345        } else {
346            0
347        }
348    }
349
350    #[inline]
351    fn end_addr(&self, data: &[u8]) -> usize {
352        data.len() - 1 - self.input_len()
353    }
354
355    #[inline]
356    fn input(&self, node: &Node<'_>) -> u8 {
357        if let Some(inp) = self.common_input() {
358            inp
359        } else {
360            node.data[node.start - 1]
361        }
362    }
363
364    #[inline]
365    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
366        node.end as CompiledAddr - 1
367    }
368}
369
370impl StateOneTrans {
371    fn compile<W: io::Write>(
372        mut wtr: W,
373        addr: CompiledAddr,
374        trans: Transition,
375    ) -> io::Result<()> {
376        let out = trans.out.value();
377        let output_pack_size =
378            if out == 0 { 0 } else { bytes::pack_uint(&mut wtr, out)? };
379        let trans_pack_size = pack_delta(&mut wtr, addr, trans.addr)?;
380
381        let mut pack_sizes = PackSizes::new();
382        pack_sizes.set_output_pack_size(output_pack_size);
383        pack_sizes.set_transition_pack_size(trans_pack_size);
384        wtr.write_all(&[pack_sizes.encode()])?;
385
386        let mut state = StateOneTrans::new();
387        state.set_common_input(trans.inp);
388        if state.common_input().is_none() {
389            wtr.write_all(&[trans.inp])?;
390        }
391        wtr.write_all(&[state.0])?;
392        Ok(())
393    }
394
395    #[inline]
396    fn new() -> StateOneTrans {
397        StateOneTrans(0b10_000000)
398    }
399
400    #[inline]
401    fn set_common_input(&mut self, input: u8) {
402        self.0 = (self.0 & 0b10_000000) | common_idx(input, 0b111111);
403    }
404
405    #[inline]
406    fn common_input(&self) -> Option<u8> {
407        common_input(self.0 & 0b00_111111)
408    }
409
410    #[inline]
411    fn input_len(&self) -> usize {
412        if self.common_input().is_none() {
413            1
414        } else {
415            0
416        }
417    }
418
419    #[inline]
420    fn sizes(&self, data: &[u8]) -> PackSizes {
421        let i = data.len() - 1 - self.input_len() - 1;
422        PackSizes::decode(data[i])
423    }
424
425    #[inline]
426    fn end_addr(&self, data: &[u8], sizes: PackSizes) -> usize {
427        data.len() - 1
428        - self.input_len()
429        - 1 // pack size
430        - sizes.transition_pack_size()
431        - sizes.output_pack_size()
432    }
433
434    #[inline]
435    fn input(&self, node: &Node<'_>) -> u8 {
436        if let Some(inp) = self.common_input() {
437            inp
438        } else {
439            node.data[node.start - 1]
440        }
441    }
442
443    #[inline]
444    fn output(&self, node: &Node<'_>) -> Output {
445        let osize = node.sizes.output_pack_size();
446        if osize == 0 {
447            return Output::zero();
448        }
449        let tsize = node.sizes.transition_pack_size();
450        let i = node.start
451                - self.input_len()
452                - 1 // pack size
453                - tsize - osize;
454        Output::new(bytes::unpack_uint(&node.data[i..], osize as u8))
455    }
456
457    #[inline]
458    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
459        let tsize = node.sizes.transition_pack_size();
460        let i = node.start
461                - self.input_len()
462                - 1 // pack size
463                - tsize;
464        unpack_delta(&node.data[i..], tsize, node.end)
465    }
466}
467
468impl StateAnyTrans {
469    fn compile<W: io::Write>(
470        mut wtr: W,
471        addr: CompiledAddr,
472        node: &BuilderNode,
473    ) -> io::Result<()> {
474        assert!(node.trans.len() <= 256);
475
476        let mut tsize = 0;
477        let mut osize = bytes::pack_size(node.final_output.value());
478        let mut any_outs = !node.final_output.is_zero();
479        for t in &node.trans {
480            tsize = cmp::max(tsize, pack_delta_size(addr, t.addr));
481            osize = cmp::max(osize, bytes::pack_size(t.out.value()));
482            any_outs = any_outs || !t.out.is_zero();
483        }
484
485        let mut pack_sizes = PackSizes::new();
486        if any_outs {
487            pack_sizes.set_output_pack_size(osize);
488        } else {
489            pack_sizes.set_output_pack_size(0);
490        }
491        pack_sizes.set_transition_pack_size(tsize);
492
493        let mut state = StateAnyTrans::new();
494        state.set_final_state(node.is_final);
495        state.set_state_ntrans(node.trans.len() as u8);
496
497        if any_outs {
498            if node.is_final {
499                bytes::pack_uint_in(
500                    &mut wtr,
501                    node.final_output.value(),
502                    osize,
503                )?;
504            }
505            for t in node.trans.iter().rev() {
506                bytes::pack_uint_in(&mut wtr, t.out.value(), osize)?;
507            }
508        }
509        for t in node.trans.iter().rev() {
510            pack_delta_in(&mut wtr, addr, t.addr, tsize)?;
511        }
512        for t in node.trans.iter().rev() {
513            wtr.write_all(&[t.inp])?;
514        }
515        if node.trans.len() > TRANS_INDEX_THRESHOLD {
516            // A value of 255 indicates that no transition exists for the byte
517            // at that index. (Except when there are 256 transitions.) Namely,
518            // any value greater than or equal to the number of transitions in
519            // this node indicates an absent transition.
520            let mut index = [255u8; 256];
521            for (i, t) in node.trans.iter().enumerate() {
522                index[t.inp as usize] = i as u8;
523            }
524            wtr.write_all(&index)?;
525        }
526
527        wtr.write_all(&[pack_sizes.encode()])?;
528        if state.state_ntrans().is_none() {
529            if node.trans.len() == 256 {
530                // 256 can't be represented in a u8, so we abuse the fact that
531                // the # of transitions can never be 1 here, since 1 is always
532                // encoded in the state byte.
533                wtr.write_all(&[1])?;
534            } else {
535                wtr.write_all(&[node.trans.len() as u8])?;
536            }
537        }
538        wtr.write_all(&[state.0])?;
539        Ok(())
540    }
541
542    #[inline]
543    fn new() -> StateAnyTrans {
544        StateAnyTrans(0b00_000000)
545    }
546
547    #[inline]
548    fn set_final_state(&mut self, yes: bool) {
549        if yes {
550            self.0 |= 0b01_000000;
551        }
552    }
553
554    #[inline]
555    fn is_final_state(&self) -> bool {
556        self.0 & 0b01_000000 == 0b01_000000
557    }
558
559    #[inline]
560    fn set_state_ntrans(&mut self, n: u8) {
561        if n <= 0b00_111111 {
562            self.0 = (self.0 & 0b11_000000) | n;
563        }
564    }
565
566    #[inline]
567    fn state_ntrans(&self) -> Option<u8> {
568        let n = self.0 & 0b00_111111;
569        if n == 0 {
570            None
571        } else {
572            Some(n)
573        }
574    }
575
576    #[inline]
577    fn sizes(&self, data: &[u8]) -> PackSizes {
578        let i = data.len() - 1 - self.ntrans_len() - 1;
579        PackSizes::decode(data[i])
580    }
581
582    #[inline]
583    fn total_trans_size(
584        &self,
585        version: u64,
586        sizes: PackSizes,
587        ntrans: usize,
588    ) -> usize {
589        let index_size = self.trans_index_size(version, ntrans);
590        ntrans + (ntrans * sizes.transition_pack_size()) + index_size
591    }
592
593    #[inline]
594    fn trans_index_size(&self, version: u64, ntrans: usize) -> usize {
595        if version >= 2 && ntrans > TRANS_INDEX_THRESHOLD {
596            256
597        } else {
598            0
599        }
600    }
601
602    #[inline]
603    fn ntrans_len(&self) -> usize {
604        if self.state_ntrans().is_none() {
605            1
606        } else {
607            0
608        }
609    }
610
611    #[inline]
612    fn ntrans(&self, data: &[u8]) -> usize {
613        if let Some(n) = self.state_ntrans() {
614            n as usize
615        } else {
616            let n = data[data.len() - 2] as usize;
617            if n == 1 {
618                // "1" is never a normal legal value here, because if there
619                // is only 1 transition, then it is encoded in the state byte.
620                256
621            } else {
622                n
623            }
624        }
625    }
626
627    #[inline]
628    fn final_output(
629        &self,
630        version: u64,
631        data: &[u8],
632        sizes: PackSizes,
633        ntrans: usize,
634    ) -> Output {
635        let osize = sizes.output_pack_size();
636        if osize == 0 || !self.is_final_state() {
637            return Output::zero();
638        }
639        let at = data.len() - 1
640                 - self.ntrans_len()
641                 - 1 // pack size
642                 - self.total_trans_size(version, sizes, ntrans)
643                 - (ntrans * osize) // output values
644                 - osize; // the desired output value
645        Output::new(bytes::unpack_uint(&data[at..], osize as u8))
646    }
647
648    #[inline]
649    fn end_addr(
650        &self,
651        version: u64,
652        data: &[u8],
653        sizes: PackSizes,
654        ntrans: usize,
655    ) -> usize {
656        let osize = sizes.output_pack_size();
657        let final_osize = if !self.is_final_state() { 0 } else { osize };
658        data.len() - 1
659        - self.ntrans_len()
660        - 1 // pack size
661        - self.total_trans_size(version, sizes, ntrans)
662        - (ntrans * osize) // output values
663        - final_osize // final output
664    }
665
666    #[inline]
667    fn trans_addr(&self, node: &Node<'_>, i: usize) -> CompiledAddr {
668        assert!(i < node.ntrans);
669        let tsize = node.sizes.transition_pack_size();
670        let at = node.start
671                 - self.ntrans_len()
672                 - 1 // pack size
673                 - self.trans_index_size(node.version, node.ntrans)
674                 - node.ntrans // inputs
675                 - (i * tsize) // the previous transition addresses
676                 - tsize; // the desired transition address
677        unpack_delta(&node.data[at..], tsize, node.end)
678    }
679
680    #[inline]
681    fn input(&self, node: &Node<'_>, i: usize) -> u8 {
682        let at = node.start
683                 - self.ntrans_len()
684                 - 1 // pack size
685                 - self.trans_index_size(node.version, node.ntrans)
686                 - i
687                 - 1; // the input byte
688        node.data[at]
689    }
690
691    #[inline]
692    fn find_input(&self, node: &Node<'_>, b: u8) -> Option<usize> {
693        if node.version >= 2 && node.ntrans > TRANS_INDEX_THRESHOLD {
694            let start = node.start
695                        - self.ntrans_len()
696                        - 1 // pack size
697                        - self.trans_index_size(node.version, node.ntrans);
698            let i = node.data[start + b as usize] as usize;
699            if i >= node.ntrans {
700                None
701            } else {
702                Some(i)
703            }
704        } else {
705            let start = node.start
706                        - self.ntrans_len()
707                        - 1 // pack size
708                        - node.ntrans; // inputs
709            let end = start + node.ntrans;
710            let inputs = &node.data[start..end];
711            inputs.iter().position(|&b2| b == b2).map(|i| node.ntrans - i - 1)
712        }
713    }
714
715    #[inline]
716    fn output(&self, node: &Node<'_>, i: usize) -> Output {
717        let osize = node.sizes.output_pack_size();
718        if osize == 0 {
719            return Output::zero();
720        }
721        let at = node.start
722                 - self.ntrans_len()
723                 - 1 // pack size
724                 - self.total_trans_size(node.version, node.sizes, node.ntrans)
725                 - (i * osize) // the previous outputs
726                 - osize; // the desired output value
727        Output::new(bytes::unpack_uint(&node.data[at..], osize as u8))
728    }
729}
730
731// high 4 bits is transition address packed size.
732// low 4 bits is output value packed size.
733//
734// `0` is a legal value which means there are no transitions/outputs.
735#[derive(Clone, Copy, Debug)]
736struct PackSizes(u8);
737
738impl PackSizes {
739    #[inline]
740    fn new() -> PackSizes {
741        PackSizes(0)
742    }
743
744    #[inline]
745    fn decode(v: u8) -> PackSizes {
746        PackSizes(v)
747    }
748
749    #[inline]
750    fn encode(&self) -> u8 {
751        self.0
752    }
753
754    #[inline]
755    fn set_transition_pack_size(&mut self, size: u8) {
756        assert!(size <= 8);
757        self.0 = (self.0 & 0b0000_1111) | (size << 4);
758    }
759
760    #[inline]
761    fn transition_pack_size(&self) -> usize {
762        ((self.0 & 0b1111_0000) >> 4) as usize
763    }
764
765    #[inline]
766    fn set_output_pack_size(&mut self, size: u8) {
767        assert!(size <= 8);
768        self.0 = (self.0 & 0b1111_0000) | size;
769    }
770
771    #[inline]
772    fn output_pack_size(&self) -> usize {
773        (self.0 & 0b0000_1111) as usize
774    }
775}
776
777/// An iterator over all transitions in a node.
778///
779/// `'f` is the lifetime of the underlying fst and `'n` is the lifetime of
780/// the underlying `Node`.
781pub struct Transitions<'f, 'n> {
782    node: &'n Node<'f>,
783    range: Range<usize>,
784}
785
786impl<'f, 'n> Iterator for Transitions<'f, 'n> {
787    type Item = Transition;
788
789    #[inline]
790    fn next(&mut self) -> Option<Transition> {
791        self.range.next().map(|i| self.node.transition(i))
792    }
793}
794
795/// common_idx translate a byte to an index in the COMMON_INPUTS_INV array.
796///
797/// I wonder if it would be prudent to store this mapping in the FST itself.
798/// The advantage of doing so would mean that common inputs would reflect the
799/// specific data in the FST. The problem of course is that this table has to
800/// be computed up front, which is pretty much at odds with the streaming
801/// nature of the builder.
802///
803/// Nevertheless, the *caller* may have a priori knowledge that could be
804/// supplied to the builder manually, which could then be embedded in the FST.
805#[inline]
806fn common_idx(input: u8, max: u8) -> u8 {
807    let val = ((COMMON_INPUTS[input as usize] as u32 + 1) % 256) as u8;
808    if val > max {
809        0
810    } else {
811        val
812    }
813}
814
815/// common_input translates a common input index stored in a serialized FST
816/// to the corresponding byte.
817#[inline]
818fn common_input(idx: u8) -> Option<u8> {
819    if idx == 0 {
820        None
821    } else {
822        Some(COMMON_INPUTS_INV[(idx - 1) as usize])
823    }
824}
825
826#[inline]
827fn pack_delta<W: io::Write>(
828    wtr: W,
829    node_addr: CompiledAddr,
830    trans_addr: CompiledAddr,
831) -> io::Result<u8> {
832    let nbytes = pack_delta_size(node_addr, trans_addr);
833    pack_delta_in(wtr, node_addr, trans_addr, nbytes)?;
834    Ok(nbytes)
835}
836
837#[inline]
838fn pack_delta_in<W: io::Write>(
839    wtr: W,
840    node_addr: CompiledAddr,
841    trans_addr: CompiledAddr,
842    nbytes: u8,
843) -> io::Result<()> {
844    let delta_addr = if trans_addr == EMPTY_ADDRESS {
845        EMPTY_ADDRESS
846    } else {
847        node_addr - trans_addr
848    };
849    bytes::pack_uint_in(wtr, delta_addr as u64, nbytes)
850}
851
852#[inline]
853fn pack_delta_size(node_addr: CompiledAddr, trans_addr: CompiledAddr) -> u8 {
854    let delta_addr = if trans_addr == EMPTY_ADDRESS {
855        EMPTY_ADDRESS
856    } else {
857        node_addr - trans_addr
858    };
859    bytes::pack_size(delta_addr as u64)
860}
861
862#[inline]
863fn unpack_delta(
864    slice: &[u8],
865    trans_pack_size: usize,
866    node_addr: usize,
867) -> CompiledAddr {
868    let delta = bytes::unpack_uint(slice, trans_pack_size as u8);
869    let delta_addr = u64_to_usize(delta);
870    if delta_addr == EMPTY_ADDRESS {
871        EMPTY_ADDRESS
872    } else {
873        node_addr - delta_addr
874    }
875}
876
877#[cfg(test)]
878mod tests {
879    use quickcheck::{quickcheck, TestResult};
880
881    use crate::raw::build::BuilderNode;
882    use crate::raw::node::Node;
883    use crate::raw::{Builder, CompiledAddr, Output, Transition, VERSION};
884    use crate::stream::Streamer;
885
886    const NEVER_LAST: CompiledAddr = std::u64::MAX as CompiledAddr;
887
888    #[test]
889    fn prop_emits_inputs() {
890        fn p(mut bs: Vec<Vec<u8>>) -> TestResult {
891            bs.sort();
892            bs.dedup();
893
894            let mut bfst = Builder::memory();
895            for word in &bs {
896                bfst.add(word).unwrap();
897            }
898            let fst = bfst.into_fst();
899            let mut rdr = fst.stream();
900            let mut words = vec![];
901            while let Some(w) = rdr.next() {
902                words.push(w.0.to_owned());
903            }
904            TestResult::from_bool(bs == words)
905        }
906        quickcheck(p as fn(Vec<Vec<u8>>) -> TestResult)
907    }
908
909    fn nodes_equal(compiled: &Node, uncompiled: &BuilderNode) -> bool {
910        println!("{:?}", compiled);
911        assert_eq!(compiled.is_final(), uncompiled.is_final);
912        assert_eq!(compiled.len(), uncompiled.trans.len());
913        assert_eq!(compiled.final_output(), uncompiled.final_output);
914        for (ct, ut) in
915            compiled.transitions().zip(uncompiled.trans.iter().cloned())
916        {
917            assert_eq!(ct.inp, ut.inp);
918            assert_eq!(ct.out, ut.out);
919            assert_eq!(ct.addr, ut.addr);
920        }
921        true
922    }
923
924    fn compile(node: &BuilderNode) -> (CompiledAddr, Vec<u8>) {
925        let mut buf = vec![0; 24];
926        node.compile_to(&mut buf, NEVER_LAST, 24).unwrap();
927        (buf.len() as CompiledAddr - 1, buf)
928    }
929
930    fn roundtrip(bnode: &BuilderNode) -> bool {
931        let (addr, bytes) = compile(bnode);
932        let node = Node::new(VERSION, addr, &bytes);
933        nodes_equal(&node, &bnode)
934    }
935
936    fn trans(addr: CompiledAddr, inp: u8) -> Transition {
937        Transition { inp, out: Output::zero(), addr }
938    }
939
940    #[test]
941    fn bin_no_trans() {
942        let bnode = BuilderNode {
943            is_final: false,
944            final_output: Output::zero(),
945            trans: vec![],
946        };
947        let (addr, buf) = compile(&bnode);
948        let node = Node::new(VERSION, addr, &buf);
949        assert_eq!(node.as_slice().len(), 3);
950        roundtrip(&bnode);
951    }
952
953    #[test]
954    fn bin_one_trans_common() {
955        let bnode = BuilderNode {
956            is_final: false,
957            final_output: Output::zero(),
958            trans: vec![trans(20, b'a')],
959        };
960        let (addr, buf) = compile(&bnode);
961        let node = Node::new(VERSION, addr, &buf);
962        assert_eq!(node.as_slice().len(), 3);
963        roundtrip(&bnode);
964    }
965
966    #[test]
967    fn bin_one_trans_not_common() {
968        let bnode = BuilderNode {
969            is_final: false,
970            final_output: Output::zero(),
971            trans: vec![trans(2, b'\xff')],
972        };
973        let (addr, buf) = compile(&bnode);
974        let node = Node::new(VERSION, addr, &buf);
975        assert_eq!(node.as_slice().len(), 4);
976        roundtrip(&bnode);
977    }
978
979    #[test]
980    fn bin_many_trans() {
981        let bnode = BuilderNode {
982            is_final: false,
983            final_output: Output::zero(),
984            trans: vec![
985                trans(2, b'a'),
986                trans(3, b'b'),
987                trans(4, b'c'),
988                trans(5, b'd'),
989                trans(6, b'e'),
990                trans(7, b'f'),
991            ],
992        };
993        let (addr, buf) = compile(&bnode);
994        let node = Node::new(VERSION, addr, &buf);
995        assert_eq!(node.as_slice().len(), 14);
996        roundtrip(&bnode);
997    }
998
999    #[test]
1000    fn node_max_trans() {
1001        let bnode = BuilderNode {
1002            is_final: false,
1003            final_output: Output::zero(),
1004            trans: (0..256).map(|i| trans(0, i as u8)).collect(),
1005        };
1006        let (addr, buf) = compile(&bnode);
1007        let node = Node::new(VERSION, addr, &buf);
1008        assert_eq!(node.transitions().count(), 256);
1009        assert_eq!(node.len(), node.transitions().count());
1010        roundtrip(&bnode);
1011    }
1012}