fst/raw/
registry.rs

1use crate::raw::build::BuilderNode;
2use crate::raw::{CompiledAddr, NONE_ADDRESS};
3
4#[derive(Debug)]
5pub struct Registry {
6    table: Vec<RegistryCell>,
7    table_size: usize, // number of rows
8    mru_size: usize,   // number of columns
9}
10
11#[derive(Debug)]
12struct RegistryCache<'a> {
13    cells: &'a mut [RegistryCell],
14}
15
16#[derive(Clone, Debug)]
17pub struct RegistryCell {
18    addr: CompiledAddr,
19    node: BuilderNode,
20}
21
22#[derive(Debug)]
23pub enum RegistryEntry<'a> {
24    Found(CompiledAddr),
25    NotFound(&'a mut RegistryCell),
26    Rejected,
27}
28
29impl Registry {
30    pub fn new(table_size: usize, mru_size: usize) -> Registry {
31        let empty_cell = RegistryCell::none();
32        let ncells = table_size.checked_mul(mru_size).unwrap();
33        Registry { table: vec![empty_cell; ncells], table_size, mru_size }
34    }
35
36    pub fn entry<'a>(&'a mut self, node: &BuilderNode) -> RegistryEntry<'a> {
37        if self.table.is_empty() {
38            return RegistryEntry::Rejected;
39        }
40        let bucket = self.hash(node);
41        let start = self.mru_size * bucket;
42        let end = start + self.mru_size;
43        RegistryCache { cells: &mut self.table[start..end] }.entry(node)
44    }
45
46    fn hash(&self, node: &BuilderNode) -> usize {
47        // Basic FNV-1a hash as described:
48        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
49        //
50        // In unscientific experiments, this provides the same compression
51        // as `std::hash::SipHasher` but is much much faster.
52        const FNV_PRIME: u64 = 1099511628211;
53        let mut h = 14695981039346656037;
54        h = (h ^ (node.is_final as u64)).wrapping_mul(FNV_PRIME);
55        h = (h ^ node.final_output.value()).wrapping_mul(FNV_PRIME);
56        for t in &node.trans {
57            h = (h ^ (t.inp as u64)).wrapping_mul(FNV_PRIME);
58            h = (h ^ t.out.value()).wrapping_mul(FNV_PRIME);
59            h = (h ^ (t.addr as u64)).wrapping_mul(FNV_PRIME);
60        }
61        (h as usize) % self.table_size
62    }
63}
64
65impl<'a> RegistryCache<'a> {
66    fn entry(mut self, node: &BuilderNode) -> RegistryEntry<'a> {
67        if self.cells.len() == 1 {
68            let cell = &mut self.cells[0];
69            if !cell.is_none() && &cell.node == node {
70                RegistryEntry::Found(cell.addr)
71            } else {
72                cell.node.clone_from(node);
73                RegistryEntry::NotFound(cell)
74            }
75        } else if self.cells.len() == 2 {
76            let cell1 = &mut self.cells[0];
77            if !cell1.is_none() && &cell1.node == node {
78                return RegistryEntry::Found(cell1.addr);
79            }
80
81            let cell2 = &mut self.cells[1];
82            if !cell2.is_none() && &cell2.node == node {
83                let addr = cell2.addr;
84                self.cells.swap(0, 1);
85                return RegistryEntry::Found(addr);
86            }
87
88            self.cells[1].node.clone_from(node);
89            self.cells.swap(0, 1);
90            RegistryEntry::NotFound(&mut self.cells[0])
91        } else {
92            let find = |c: &RegistryCell| !c.is_none() && &c.node == node;
93            if let Some(i) = self.cells.iter().position(find) {
94                let addr = self.cells[i].addr;
95                self.promote(i); // most recently used
96                RegistryEntry::Found(addr)
97            } else {
98                let last = self.cells.len() - 1;
99                self.cells[last].node.clone_from(node); // discard LRU
100                self.promote(last);
101                RegistryEntry::NotFound(&mut self.cells[0])
102            }
103        }
104    }
105
106    fn promote(&mut self, mut i: usize) {
107        assert!(i < self.cells.len());
108        while i > 0 {
109            self.cells.swap(i - 1, i);
110            i -= 1;
111        }
112    }
113}
114
115impl RegistryCell {
116    fn none() -> RegistryCell {
117        RegistryCell { addr: NONE_ADDRESS, node: BuilderNode::default() }
118    }
119
120    fn is_none(&self) -> bool {
121        self.addr == NONE_ADDRESS
122    }
123
124    pub fn insert(&mut self, addr: CompiledAddr) {
125        self.addr = addr;
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::{Registry, RegistryCache, RegistryCell, RegistryEntry};
132    use crate::raw::build::BuilderNode;
133    use crate::raw::{Output, Transition};
134
135    fn assert_rejected(entry: RegistryEntry) {
136        match entry {
137            RegistryEntry::Rejected => {}
138            entry => panic!("expected rejected entry, got: {:?}", entry),
139        }
140    }
141
142    fn assert_not_found(entry: RegistryEntry) {
143        match entry {
144            RegistryEntry::NotFound(_) => {}
145            entry => panic!("expected nout found entry, got: {:?}", entry),
146        }
147    }
148
149    fn assert_insert_and_found(reg: &mut Registry, bnode: &BuilderNode) {
150        match reg.entry(&bnode) {
151            RegistryEntry::NotFound(cell) => cell.insert(1234),
152            entry => panic!("unexpected not found entry, got: {:?}", entry),
153        }
154        match reg.entry(&bnode) {
155            RegistryEntry::Found(addr) => assert_eq!(addr, 1234),
156            entry => panic!("unexpected found entry, got: {:?}", entry),
157        }
158    }
159
160    #[test]
161    fn empty_is_ok() {
162        let mut reg = Registry::new(0, 0);
163        let bnode = BuilderNode {
164            is_final: false,
165            final_output: Output::zero(),
166            trans: vec![],
167        };
168        assert_rejected(reg.entry(&bnode));
169    }
170
171    #[test]
172    fn one_final_is_ok() {
173        let mut reg = Registry::new(1, 1);
174        let bnode = BuilderNode {
175            is_final: true,
176            final_output: Output::zero(),
177            trans: vec![],
178        };
179        assert_insert_and_found(&mut reg, &bnode);
180    }
181
182    #[test]
183    fn one_with_trans_is_ok() {
184        let mut reg = Registry::new(1, 1);
185        let bnode = BuilderNode {
186            is_final: false,
187            final_output: Output::zero(),
188            trans: vec![Transition {
189                addr: 0,
190                inp: b'a',
191                out: Output::zero(),
192            }],
193        };
194        assert_insert_and_found(&mut reg, &bnode);
195        assert_not_found(
196            reg.entry(&BuilderNode { is_final: true, ..bnode.clone() }),
197        );
198        assert_not_found(reg.entry(&BuilderNode {
199            trans: vec![Transition {
200                addr: 0,
201                inp: b'b',
202                out: Output::zero(),
203            }],
204            ..bnode.clone()
205        }));
206        assert_not_found(reg.entry(&BuilderNode {
207            trans: vec![Transition {
208                addr: 0,
209                inp: b'a',
210                out: Output::new(1),
211            }],
212            ..bnode.clone()
213        }));
214    }
215
216    #[test]
217    fn cache_works() {
218        let mut reg = Registry::new(1, 1);
219
220        let bnode1 = BuilderNode { is_final: true, ..BuilderNode::default() };
221        assert_insert_and_found(&mut reg, &bnode1);
222
223        let bnode2 =
224            BuilderNode { final_output: Output::new(1), ..bnode1.clone() };
225        assert_insert_and_found(&mut reg, &bnode2);
226        assert_not_found(reg.entry(&bnode1));
227    }
228
229    #[test]
230    fn promote() {
231        let bn = BuilderNode::default();
232        let mut bnodes = vec![
233            RegistryCell { addr: 1, node: bn.clone() },
234            RegistryCell { addr: 2, node: bn.clone() },
235            RegistryCell { addr: 3, node: bn.clone() },
236            RegistryCell { addr: 4, node: bn.clone() },
237        ];
238        let mut cache = RegistryCache { cells: &mut bnodes };
239
240        cache.promote(0);
241        assert_eq!(cache.cells[0].addr, 1);
242        assert_eq!(cache.cells[1].addr, 2);
243        assert_eq!(cache.cells[2].addr, 3);
244        assert_eq!(cache.cells[3].addr, 4);
245
246        cache.promote(1);
247        assert_eq!(cache.cells[0].addr, 2);
248        assert_eq!(cache.cells[1].addr, 1);
249        assert_eq!(cache.cells[2].addr, 3);
250        assert_eq!(cache.cells[3].addr, 4);
251
252        cache.promote(3);
253        assert_eq!(cache.cells[0].addr, 4);
254        assert_eq!(cache.cells[1].addr, 2);
255        assert_eq!(cache.cells[2].addr, 1);
256        assert_eq!(cache.cells[3].addr, 3);
257
258        cache.promote(2);
259        assert_eq!(cache.cells[0].addr, 1);
260        assert_eq!(cache.cells[1].addr, 4);
261        assert_eq!(cache.cells[2].addr, 2);
262        assert_eq!(cache.cells[3].addr, 3);
263    }
264}