fst_no_std/raw/
node.rs

1#[cfg(feature = "std")]
2use core::cmp;
3use core::fmt;
4use core::ops::Range;
5#[cfg(feature = "std")]
6use std::io;
7
8use crate::bytes;
9#[cfg(feature = "std")]
10use crate::raw::build::BuilderNode;
11#[cfg(feature = "std")]
12use crate::raw::common_inputs::COMMON_INPUTS;
13use crate::raw::common_inputs::COMMON_INPUTS_INV;
14use crate::raw::{
15    u64_to_usize, CompiledAddr, Output, Transition, EMPTY_ADDRESS,
16};
17
18/// The threshold (in number of transitions) at which an index is created for
19/// a node's transitions. This speeds up lookup time at the expense of FST
20/// size.
21const TRANS_INDEX_THRESHOLD: usize = 32;
22
23/// Node represents a single state in a finite state transducer.
24///
25/// Nodes are very cheap to construct. Notably, they satisfy the `Copy` trait.
26#[derive(Clone, Copy)]
27pub struct Node<'f> {
28    data: &'f [u8],
29    version: u64,
30    state: State,
31    start: CompiledAddr,
32    end: usize,
33    is_final: bool,
34    ntrans: usize,
35    sizes: PackSizes,
36    final_output: Output,
37}
38
39impl<'f> fmt::Debug for Node<'f> {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        writeln!(f, "NODE@{}", self.start)?;
42        writeln!(f, "  end_addr: {}", self.end)?;
43        writeln!(f, "  size: {} bytes", self.as_slice().len())?;
44        writeln!(f, "  state: {:?}", self.state)?;
45        writeln!(f, "  is_final: {}", self.is_final())?;
46        writeln!(f, "  final_output: {:?}", self.final_output())?;
47        writeln!(f, "  # transitions: {}", self.len())?;
48        writeln!(f, "  transitions:")?;
49        for t in self.transitions() {
50            writeln!(f, "    {t:?}")?;
51        }
52        Ok(())
53    }
54}
55
56impl<'f> Node<'f> {
57    /// Creates a new note at the address given.
58    ///
59    /// `data` should be a slice to an entire FST.
60    pub(crate) fn new(
61        version: u64,
62        addr: CompiledAddr,
63        data: &[u8],
64    ) -> Node<'_> {
65        let state = State::new(data, addr);
66        match state {
67            State::EmptyFinal => Node {
68                data: &[],
69                version,
70                state: State::EmptyFinal,
71                start: EMPTY_ADDRESS,
72                end: EMPTY_ADDRESS,
73                is_final: true,
74                ntrans: 0,
75                sizes: PackSizes::new(),
76                final_output: Output::zero(),
77            },
78            State::OneTransNext(s) => {
79                let data = &data[..=addr];
80                Node {
81                    data,
82                    version,
83                    state,
84                    start: addr,
85                    end: s.end_addr(data),
86                    is_final: false,
87                    sizes: PackSizes::new(),
88                    ntrans: 1,
89                    final_output: Output::zero(),
90                }
91            }
92            State::OneTrans(s) => {
93                let data = &data[..=addr];
94                let sizes = s.sizes(data);
95                Node {
96                    data,
97                    version,
98                    state,
99                    start: addr,
100                    end: s.end_addr(data, sizes),
101                    is_final: false,
102                    ntrans: 1,
103                    sizes,
104                    final_output: Output::zero(),
105                }
106            }
107            State::AnyTrans(s) => {
108                let data = &data[..=addr];
109                let sizes = s.sizes(data);
110                let ntrans = s.ntrans(data);
111                Node {
112                    data,
113                    version,
114                    state,
115                    start: addr,
116                    end: s.end_addr(version, data, sizes, ntrans),
117                    is_final: s.is_final_state(),
118                    ntrans,
119                    sizes,
120                    final_output: s.final_output(version, data, sizes, ntrans),
121                }
122            }
123        }
124    }
125    /// Returns an iterator over all transitions in this node in lexicographic
126    /// order.
127    #[inline]
128    #[must_use]
129    pub fn transitions<'n>(&'n self) -> Transitions<'f, 'n> {
130        Transitions { node: self, range: 0..self.len() }
131    }
132
133    /// Returns the transition at index `i`.
134    #[inline(always)]
135    #[must_use]
136    pub fn transition(&self, i: usize) -> Transition {
137        // The `inline(always)` annotation on this function appears to
138        // dramatically speed up FST traversal. In particular, measuring the
139        // time it takes to run `fst range something-big.fst` shows almost a 2x
140        // improvement with this single `inline(always)` annotation.
141
142        match self.state {
143            State::OneTransNext(s) => {
144                assert!(i == 0);
145                Transition {
146                    inp: s.input(self),
147                    out: Output::zero(),
148                    addr: s.trans_addr(self),
149                }
150            }
151            State::OneTrans(s) => {
152                assert!(i == 0);
153                Transition {
154                    inp: s.input(self),
155                    out: s.output(self),
156                    addr: s.trans_addr(self),
157                }
158            }
159            State::AnyTrans(s) => Transition {
160                inp: s.input(self, i),
161                out: s.output(self, i),
162                addr: s.trans_addr(self, i),
163            },
164            State::EmptyFinal => panic!("out of bounds"),
165        }
166    }
167
168    /// Returns the transition address of the `i`th transition.
169    #[inline]
170    #[must_use]
171    pub fn transition_addr(&self, i: usize) -> CompiledAddr {
172        match self.state {
173            State::OneTransNext(s) => {
174                assert!(i == 0);
175                s.trans_addr(self)
176            }
177            State::OneTrans(s) => {
178                assert!(i == 0);
179                s.trans_addr(self)
180            }
181            State::AnyTrans(s) => s.trans_addr(self, i),
182            State::EmptyFinal => panic!("out of bounds"),
183        }
184    }
185
186    /// Finds the `i`th transition corresponding to the given input byte.
187    ///
188    /// If no transition for this byte exists, then `None` is returned.
189    #[inline]
190    #[must_use]
191    pub fn find_input(&self, b: u8) -> Option<usize> {
192        match self.state {
193            State::OneTransNext(s) if s.input(self) == b => Some(0),
194            State::OneTransNext(_) => None,
195            State::OneTrans(s) if s.input(self) == b => Some(0),
196            State::OneTrans(_) => None,
197            State::AnyTrans(s) => s.find_input(self, b),
198            State::EmptyFinal => None,
199        }
200    }
201
202    /// If this node is final and has a terminal output value, then it is
203    /// returned. Otherwise, a zero output is returned.
204    #[inline]
205    #[must_use]
206    pub fn final_output(&self) -> Output {
207        self.final_output
208    }
209
210    /// Returns true if and only if this node corresponds to a final or "match"
211    /// state in the finite state transducer.
212    #[inline]
213    #[must_use]
214    pub fn is_final(&self) -> bool {
215        self.is_final
216    }
217
218    /// Returns the number of transitions in this node.
219    ///
220    /// The maximum number of transitions is 256.
221    #[inline]
222    #[must_use]
223    pub fn len(&self) -> usize {
224        self.ntrans
225    }
226
227    /// Returns true if and only if this node has zero transitions.
228    #[inline]
229    #[must_use]
230    pub fn is_empty(&self) -> bool {
231        self.ntrans == 0
232    }
233
234    /// Return the address of this node.
235    #[inline]
236    #[must_use]
237    pub fn addr(&self) -> CompiledAddr {
238        self.start
239    }
240
241    #[doc(hidden)]
242    #[inline]
243    #[must_use]
244    pub fn as_slice(&self) -> &[u8] {
245        &self.data[self.end..]
246    }
247
248    #[doc(hidden)]
249    #[inline]
250    #[must_use]
251    pub fn state(&self) -> &'static str {
252        match self.state {
253            State::OneTransNext(_) => "OTN",
254            State::OneTrans(_) => "OT",
255            State::AnyTrans(_) => "AT",
256            State::EmptyFinal => "EF",
257        }
258    }
259
260    #[cfg(feature = "std")]
261    fn compile<W: io::Write>(
262        wtr: W,
263        last_addr: CompiledAddr,
264        addr: CompiledAddr,
265        node: &BuilderNode,
266    ) -> io::Result<()> {
267        assert!(node.trans.len() <= 256);
268        if node.trans.is_empty()
269            && node.is_final
270            && node.final_output.is_zero()
271        {
272            Ok(())
273        } else if node.trans.len() != 1 || node.is_final {
274            StateAnyTrans::compile(wtr, addr, node)
275        } else if node.trans[0].addr == last_addr
276            && node.trans[0].out.is_zero()
277        {
278            StateOneTransNext::compile(wtr, addr, node.trans[0].inp)
279        } else {
280            StateOneTrans::compile(wtr, addr, node.trans[0])
281        }
282    }
283}
284
285#[cfg(feature = "std")]
286impl BuilderNode {
287    pub fn compile_to<W: io::Write>(
288        &self,
289        wtr: W,
290        last_addr: CompiledAddr,
291        addr: CompiledAddr,
292    ) -> io::Result<()> {
293        Node::compile(wtr, last_addr, addr, self)
294    }
295}
296
297#[derive(Clone, Copy, Debug)]
298enum State {
299    OneTransNext(StateOneTransNext),
300    OneTrans(StateOneTrans),
301    AnyTrans(StateAnyTrans),
302    EmptyFinal,
303}
304
305// one trans flag (1), next flag (1), common input
306#[derive(Clone, Copy, Debug)]
307struct StateOneTransNext(u8);
308// one trans flag (1), next flag (0), common input
309#[derive(Clone, Copy, Debug)]
310struct StateOneTrans(u8);
311// one trans flag (0), final flag, # transitions
312#[derive(Clone, Copy, Debug)]
313struct StateAnyTrans(u8);
314
315impl State {
316    fn new(data: &[u8], addr: CompiledAddr) -> State {
317        if addr == EMPTY_ADDRESS {
318            return State::EmptyFinal;
319        }
320        let v = data[addr];
321        match (v & 0b11_000000) >> 6 {
322            0b11 => State::OneTransNext(StateOneTransNext(v)),
323            0b10 => State::OneTrans(StateOneTrans(v)),
324            _ => State::AnyTrans(StateAnyTrans(v)),
325        }
326    }
327}
328
329impl StateOneTransNext {
330    #[cfg(feature = "std")]
331    fn compile<W: io::Write>(
332        mut wtr: W,
333        _: CompiledAddr,
334        input: u8,
335    ) -> io::Result<()> {
336        let mut state = StateOneTransNext::new();
337        state.set_common_input(input);
338        if state.common_input().is_none() {
339            wtr.write_all(&[input])?;
340        }
341        wtr.write_all(&[state.0])?;
342        Ok(())
343    }
344
345    #[inline]
346    #[cfg(feature = "std")]
347    fn new() -> StateOneTransNext {
348        StateOneTransNext(0b11_000000)
349    }
350
351    #[inline]
352    #[cfg(feature = "std")]
353    fn set_common_input(&mut self, input: u8) {
354        self.0 = (self.0 & 0b11_000000) | common_idx(input, 0b11_1111);
355    }
356
357    #[inline]
358    fn common_input(&self) -> Option<u8> {
359        common_input(self.0 & 0b00_111111)
360    }
361
362    #[inline]
363    fn input_len(&self) -> usize {
364        usize::from(self.common_input().is_none())
365    }
366
367    #[inline]
368    fn end_addr(&self, data: &[u8]) -> usize {
369        data.len() - 1 - self.input_len()
370    }
371
372    #[inline]
373    fn input(&self, node: &Node<'_>) -> u8 {
374        if let Some(inp) = self.common_input() {
375            inp
376        } else {
377            node.data[node.start - 1]
378        }
379    }
380
381    #[inline]
382    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
383        node.end as CompiledAddr - 1
384    }
385}
386
387impl StateOneTrans {
388    #[cfg(feature = "std")]
389    fn compile<W: io::Write>(
390        mut wtr: W,
391        addr: CompiledAddr,
392        trans: Transition,
393    ) -> io::Result<()> {
394        let out = trans.out.value();
395        let output_pack_size =
396            if out == 0 { 0 } else { bytes::pack_uint(&mut wtr, out)? };
397        let trans_pack_size = pack_delta(&mut wtr, addr, trans.addr)?;
398
399        let mut pack_sizes = PackSizes::new();
400        pack_sizes.set_output_pack_size(output_pack_size);
401        pack_sizes.set_transition_pack_size(trans_pack_size);
402        wtr.write_all(&[pack_sizes.encode()])?;
403
404        let mut state = StateOneTrans::new();
405        state.set_common_input(trans.inp);
406        if state.common_input().is_none() {
407            wtr.write_all(&[trans.inp])?;
408        }
409        wtr.write_all(&[state.0])?;
410        Ok(())
411    }
412
413    #[inline]
414    #[cfg(feature = "std")]
415    fn new() -> StateOneTrans {
416        StateOneTrans(0b10_000000)
417    }
418
419    #[inline]
420    #[cfg(feature = "std")]
421    fn set_common_input(&mut self, input: u8) {
422        self.0 = (self.0 & 0b10_000000) | common_idx(input, 0b11_1111);
423    }
424
425    #[inline]
426    fn common_input(&self) -> Option<u8> {
427        common_input(self.0 & 0b00_111111)
428    }
429
430    #[inline]
431    fn input_len(&self) -> usize {
432        usize::from(self.common_input().is_none())
433    }
434
435    #[inline]
436    fn sizes(&self, data: &[u8]) -> PackSizes {
437        let i = data.len() - 1 - self.input_len() - 1;
438        PackSizes::decode(data[i])
439    }
440
441    #[inline]
442    fn end_addr(&self, data: &[u8], sizes: PackSizes) -> usize {
443        data.len() - 1
444        - self.input_len()
445        - 1 // pack size
446        - sizes.transition_pack_size()
447        - sizes.output_pack_size()
448    }
449
450    #[inline]
451    fn input(&self, node: &Node<'_>) -> u8 {
452        if let Some(inp) = self.common_input() {
453            inp
454        } else {
455            node.data[node.start - 1]
456        }
457    }
458
459    #[inline]
460    fn output(&self, node: &Node<'_>) -> Output {
461        let osize = node.sizes.output_pack_size();
462        if osize == 0 {
463            return Output::zero();
464        }
465        let tsize = node.sizes.transition_pack_size();
466        let i = node.start
467                - self.input_len()
468                - 1 // pack size
469                - tsize - osize;
470        Output::new(bytes::unpack_uint(&node.data[i..], osize as u8))
471    }
472
473    #[inline]
474    fn trans_addr(&self, node: &Node<'_>) -> CompiledAddr {
475        let tsize = node.sizes.transition_pack_size();
476        let i = node.start
477                - self.input_len()
478                - 1 // pack size
479                - tsize;
480        unpack_delta(&node.data[i..], tsize, node.end)
481    }
482}
483
484impl StateAnyTrans {
485    #[cfg(feature = "std")]
486    fn compile<W: io::Write>(
487        mut wtr: W,
488        addr: CompiledAddr,
489        node: &BuilderNode,
490    ) -> io::Result<()> {
491        assert!(node.trans.len() <= 256);
492
493        let mut tsize = 0;
494        let mut osize = bytes::pack_size(node.final_output.value());
495        let mut any_outs = !node.final_output.is_zero();
496        for t in &node.trans {
497            tsize = cmp::max(tsize, pack_delta_size(addr, t.addr));
498            osize = cmp::max(osize, bytes::pack_size(t.out.value()));
499            any_outs = any_outs || !t.out.is_zero();
500        }
501
502        let mut pack_sizes = PackSizes::new();
503        if any_outs {
504            pack_sizes.set_output_pack_size(osize);
505        } else {
506            pack_sizes.set_output_pack_size(0);
507        }
508        pack_sizes.set_transition_pack_size(tsize);
509
510        let mut state = StateAnyTrans::new();
511        state.set_final_state(node.is_final);
512        state.set_state_ntrans(node.trans.len() as u8);
513
514        if any_outs {
515            if node.is_final {
516                bytes::pack_uint_in(
517                    &mut wtr,
518                    node.final_output.value(),
519                    osize,
520                )?;
521            }
522            for t in node.trans.iter().rev() {
523                bytes::pack_uint_in(&mut wtr, t.out.value(), osize)?;
524            }
525        }
526        for t in node.trans.iter().rev() {
527            pack_delta_in(&mut wtr, addr, t.addr, tsize)?;
528        }
529        for t in node.trans.iter().rev() {
530            wtr.write_all(&[t.inp])?;
531        }
532        if node.trans.len() > TRANS_INDEX_THRESHOLD {
533            // A value of 255 indicates that no transition exists for the byte
534            // at that index. (Except when there are 256 transitions.) Namely,
535            // any value greater than or equal to the number of transitions in
536            // this node indicates an absent transition.
537            let mut index = [255u8; 256];
538            for (i, t) in node.trans.iter().enumerate() {
539                index[t.inp as usize] = i as u8;
540            }
541            wtr.write_all(&index)?;
542        }
543
544        wtr.write_all(&[pack_sizes.encode()])?;
545        if state.state_ntrans().is_none() {
546            if node.trans.len() == 256 {
547                // 256 can't be represented in a u8, so we abuse the fact that
548                // the # of transitions can never be 1 here, since 1 is always
549                // encoded in the state byte.
550                wtr.write_all(&[1])?;
551            } else {
552                wtr.write_all(&[node.trans.len() as u8])?;
553            }
554        }
555        wtr.write_all(&[state.0])?;
556        Ok(())
557    }
558
559    #[inline]
560    #[cfg(feature = "std")]
561    fn new() -> StateAnyTrans {
562        StateAnyTrans(0b00_000000)
563    }
564
565    #[inline]
566    #[cfg(feature = "std")]
567    fn set_final_state(&mut self, yes: bool) {
568        if yes {
569            self.0 |= 0b01_000000;
570        }
571    }
572
573    #[inline]
574    fn is_final_state(&self) -> bool {
575        self.0 & 0b01_000000 == 0b01_000000
576    }
577
578    #[inline]
579    #[cfg(feature = "std")]
580    fn set_state_ntrans(&mut self, n: u8) {
581        if n <= 0b00_111111 {
582            self.0 = (self.0 & 0b11_000000) | n;
583        }
584    }
585
586    #[inline]
587    fn state_ntrans(&self) -> Option<u8> {
588        let n = self.0 & 0b00_111111;
589        if n == 0 {
590            None
591        } else {
592            Some(n)
593        }
594    }
595
596    #[inline]
597    fn sizes(&self, data: &[u8]) -> PackSizes {
598        let i = data.len() - 1 - self.ntrans_len() - 1;
599        PackSizes::decode(data[i])
600    }
601
602    #[inline]
603    fn total_trans_size(
604        &self,
605        version: u64,
606        sizes: PackSizes,
607        ntrans: usize,
608    ) -> usize {
609        let index_size = self.trans_index_size(version, ntrans);
610        ntrans + (ntrans * sizes.transition_pack_size()) + index_size
611    }
612
613    #[inline]
614    fn trans_index_size(&self, version: u64, ntrans: usize) -> usize {
615        if version >= 2 && ntrans > TRANS_INDEX_THRESHOLD {
616            256
617        } else {
618            0
619        }
620    }
621
622    #[inline]
623    fn ntrans_len(&self) -> usize {
624        usize::from(self.state_ntrans().is_none())
625    }
626
627    #[inline]
628    fn ntrans(&self, data: &[u8]) -> usize {
629        if let Some(n) = self.state_ntrans() {
630            n as usize
631        } else {
632            let n = data[data.len() - 2] as usize;
633            if n == 1 {
634                // "1" is never a normal legal value here, because if there
635                // is only 1 transition, then it is encoded in the state byte.
636                256
637            } else {
638                n
639            }
640        }
641    }
642
643    #[inline]
644    fn final_output(
645        &self,
646        version: u64,
647        data: &[u8],
648        sizes: PackSizes,
649        ntrans: usize,
650    ) -> Output {
651        let osize = sizes.output_pack_size();
652        if osize == 0 || !self.is_final_state() {
653            return Output::zero();
654        }
655        let at = data.len() - 1
656                 - self.ntrans_len()
657                 - 1 // pack size
658                 - self.total_trans_size(version, sizes, ntrans)
659                 - (ntrans * osize) // output values
660                 - osize; // the desired output value
661        Output::new(bytes::unpack_uint(&data[at..], osize as u8))
662    }
663
664    #[inline]
665    fn end_addr(
666        &self,
667        version: u64,
668        data: &[u8],
669        sizes: PackSizes,
670        ntrans: usize,
671    ) -> usize {
672        let osize = sizes.output_pack_size();
673        let final_osize = if !self.is_final_state() { 0 } else { osize };
674        data.len() - 1
675        - self.ntrans_len()
676        - 1 // pack size
677        - self.total_trans_size(version, sizes, ntrans)
678        - (ntrans * osize) // output values
679        - final_osize // final output
680    }
681
682    #[inline]
683    fn trans_addr(&self, node: &Node<'_>, i: usize) -> CompiledAddr {
684        assert!(i < node.ntrans);
685        let tsize = node.sizes.transition_pack_size();
686        let at = node.start
687                 - self.ntrans_len()
688                 - 1 // pack size
689                 - self.trans_index_size(node.version, node.ntrans)
690                 - node.ntrans // inputs
691                 - (i * tsize) // the previous transition addresses
692                 - tsize; // the desired transition address
693        unpack_delta(&node.data[at..], tsize, node.end)
694    }
695
696    #[inline]
697    fn input(&self, node: &Node<'_>, i: usize) -> u8 {
698        let at = node.start
699                 - self.ntrans_len()
700                 - 1 // pack size
701                 - self.trans_index_size(node.version, node.ntrans)
702                 - i
703                 - 1; // the input byte
704        node.data[at]
705    }
706
707    #[inline]
708    fn find_input(&self, node: &Node<'_>, b: u8) -> Option<usize> {
709        if node.version >= 2 && node.ntrans > TRANS_INDEX_THRESHOLD {
710            let start = node.start
711                        - self.ntrans_len()
712                        - 1 // pack size
713                        - self.trans_index_size(node.version, node.ntrans);
714            let i = node.data[start + b as usize] as usize;
715            if i >= node.ntrans {
716                None
717            } else {
718                Some(i)
719            }
720        } else {
721            let start = node.start
722                        - self.ntrans_len()
723                        - 1 // pack size
724                        - node.ntrans; // inputs
725            let end = start + node.ntrans;
726            let inputs = &node.data[start..end];
727            inputs.iter().position(|&b2| b == b2).map(|i| node.ntrans - i - 1)
728        }
729    }
730
731    #[inline]
732    fn output(&self, node: &Node<'_>, i: usize) -> Output {
733        let osize = node.sizes.output_pack_size();
734        if osize == 0 {
735            return Output::zero();
736        }
737        let at = node.start
738                 - self.ntrans_len()
739                 - 1 // pack size
740                 - self.total_trans_size(node.version, node.sizes, node.ntrans)
741                 - (i * osize) // the previous outputs
742                 - osize; // the desired output value
743        Output::new(bytes::unpack_uint(&node.data[at..], osize as u8))
744    }
745}
746
747// high 4 bits is transition address packed size.
748// low 4 bits is output value packed size.
749//
750// `0` is a legal value which means there are no transitions/outputs.
751#[derive(Clone, Copy, Debug)]
752struct PackSizes(u8);
753
754impl PackSizes {
755    #[inline]
756    fn new() -> PackSizes {
757        PackSizes(0)
758    }
759
760    #[inline]
761    fn decode(v: u8) -> PackSizes {
762        PackSizes(v)
763    }
764
765    #[inline]
766    #[cfg(feature = "std")]
767    fn encode(&self) -> u8 {
768        self.0
769    }
770
771    #[inline]
772    #[cfg(feature = "std")]
773    fn set_transition_pack_size(&mut self, size: u8) {
774        assert!(size <= 8);
775        self.0 = (self.0 & 0b0000_1111) | (size << 4);
776    }
777
778    #[inline]
779    fn transition_pack_size(&self) -> usize {
780        ((self.0 & 0b1111_0000) >> 4) as usize
781    }
782
783    #[inline]
784    #[cfg(feature = "std")]
785    fn set_output_pack_size(&mut self, size: u8) {
786        assert!(size <= 8);
787        self.0 = (self.0 & 0b1111_0000) | size;
788    }
789
790    #[inline]
791    fn output_pack_size(&self) -> usize {
792        (self.0 & 0b0000_1111) as usize
793    }
794}
795
796/// An iterator over all transitions in a node.
797///
798/// `'f` is the lifetime of the underlying fst and `'n` is the lifetime of
799/// the underlying `Node`.
800pub struct Transitions<'f, 'n> {
801    node: &'n Node<'f>,
802    range: Range<usize>,
803}
804
805impl<'f, 'n> Iterator for Transitions<'f, 'n> {
806    type Item = Transition;
807
808    #[inline]
809    fn next(&mut self) -> Option<Transition> {
810        self.range.next().map(|i| self.node.transition(i))
811    }
812}
813
814/// `common_idx` translate a byte to an index in the `COMMON_INPUTS_INV` array.
815///
816/// I wonder if it would be prudent to store this mapping in the FST itself.
817/// The advantage of doing so would mean that common inputs would reflect the
818/// specific data in the FST. The problem of course is that this table has to
819/// be computed up front, which is pretty much at odds with the streaming
820/// nature of the builder.
821///
822/// Nevertheless, the *caller* may have a priori knowledge that could be
823/// supplied to the builder manually, which could then be embedded in the FST.
824#[inline]
825#[cfg(feature = "std")]
826fn common_idx(input: u8, max: u8) -> u8 {
827    let val = ((u32::from(COMMON_INPUTS[input as usize]) + 1) % 256) as u8;
828    if val > max {
829        0
830    } else {
831        val
832    }
833}
834
835/// `common_input` translates a common input index stored in a serialized FST
836/// to the corresponding byte.
837#[inline]
838fn common_input(idx: u8) -> Option<u8> {
839    if idx == 0 {
840        None
841    } else {
842        Some(COMMON_INPUTS_INV[(idx - 1) as usize])
843    }
844}
845
846#[inline]
847#[cfg(feature = "std")]
848fn pack_delta<W: io::Write>(
849    wtr: W,
850    node_addr: CompiledAddr,
851    trans_addr: CompiledAddr,
852) -> io::Result<u8> {
853    let nbytes = pack_delta_size(node_addr, trans_addr);
854    pack_delta_in(wtr, node_addr, trans_addr, nbytes)?;
855    Ok(nbytes)
856}
857
858#[inline]
859#[cfg(feature = "std")]
860fn pack_delta_in<W: io::Write>(
861    wtr: W,
862    node_addr: CompiledAddr,
863    trans_addr: CompiledAddr,
864    nbytes: u8,
865) -> io::Result<()> {
866    let delta_addr = if trans_addr == EMPTY_ADDRESS {
867        EMPTY_ADDRESS
868    } else {
869        node_addr - trans_addr
870    };
871    bytes::pack_uint_in(wtr, delta_addr as u64, nbytes)
872}
873
874#[inline]
875#[cfg(feature = "std")]
876fn pack_delta_size(node_addr: CompiledAddr, trans_addr: CompiledAddr) -> u8 {
877    let delta_addr = if trans_addr == EMPTY_ADDRESS {
878        EMPTY_ADDRESS
879    } else {
880        node_addr - trans_addr
881    };
882    bytes::pack_size(delta_addr as u64)
883}
884
885#[inline]
886fn unpack_delta(
887    slice: &[u8],
888    trans_pack_size: usize,
889    node_addr: usize,
890) -> CompiledAddr {
891    let delta = bytes::unpack_uint(slice, trans_pack_size as u8);
892    let delta_addr = u64_to_usize(delta);
893    if delta_addr == EMPTY_ADDRESS {
894        EMPTY_ADDRESS
895    } else {
896        node_addr - delta_addr
897    }
898}
899
900#[cfg(test)]
901mod tests {
902    use quickcheck::{quickcheck, TestResult};
903
904    use crate::raw::build::BuilderNode;
905    use crate::raw::node::Node;
906    use crate::raw::{Builder, CompiledAddr, Output, Transition, VERSION};
907    use crate::stream::Streamer;
908
909    const NEVER_LAST: CompiledAddr = std::u64::MAX as CompiledAddr;
910
911    #[test]
912    fn prop_emits_inputs() {
913        fn p(mut bs: Vec<Vec<u8>>) -> TestResult {
914            bs.sort();
915            bs.dedup();
916
917            let mut bfst = Builder::memory();
918            for word in &bs {
919                bfst.add(word).unwrap();
920            }
921            let fst = bfst.into_fst();
922            let mut rdr = fst.stream();
923            let mut words = vec![];
924            while let Some(w) = rdr.next() {
925                words.push(w.0.to_owned());
926            }
927            TestResult::from_bool(bs == words)
928        }
929        quickcheck(p as fn(Vec<Vec<u8>>) -> TestResult);
930    }
931
932    fn nodes_equal(compiled: &Node, uncompiled: &BuilderNode) -> bool {
933        println!("{compiled:?}");
934        assert_eq!(compiled.is_final(), uncompiled.is_final);
935        assert_eq!(compiled.len(), uncompiled.trans.len());
936        assert_eq!(compiled.final_output(), uncompiled.final_output);
937        for (ct, ut) in
938            compiled.transitions().zip(uncompiled.trans.iter().copied())
939        {
940            assert_eq!(ct.inp, ut.inp);
941            assert_eq!(ct.out, ut.out);
942            assert_eq!(ct.addr, ut.addr);
943        }
944        true
945    }
946
947    fn compile(node: &BuilderNode) -> (CompiledAddr, Vec<u8>) {
948        let mut buf = vec![0; 24];
949        node.compile_to(&mut buf, NEVER_LAST, 24).unwrap();
950        (buf.len() as CompiledAddr - 1, buf)
951    }
952
953    fn roundtrip(bnode: &BuilderNode) -> bool {
954        let (addr, bytes) = compile(bnode);
955        let node = Node::new(VERSION, addr, &bytes);
956        nodes_equal(&node, bnode)
957    }
958
959    fn trans(addr: CompiledAddr, inp: u8) -> Transition {
960        Transition { inp, out: Output::zero(), addr }
961    }
962
963    #[test]
964    fn bin_no_trans() {
965        let bnode = BuilderNode {
966            is_final: false,
967            final_output: Output::zero(),
968            trans: vec![],
969        };
970        let (addr, buf) = compile(&bnode);
971        let node = Node::new(VERSION, addr, &buf);
972        assert_eq!(node.as_slice().len(), 3);
973        roundtrip(&bnode);
974    }
975
976    #[test]
977    fn bin_one_trans_common() {
978        let bnode = BuilderNode {
979            is_final: false,
980            final_output: Output::zero(),
981            trans: vec![trans(20, b'a')],
982        };
983        let (addr, buf) = compile(&bnode);
984        let node = Node::new(VERSION, addr, &buf);
985        assert_eq!(node.as_slice().len(), 3);
986        roundtrip(&bnode);
987    }
988
989    #[test]
990    fn bin_one_trans_not_common() {
991        let bnode = BuilderNode {
992            is_final: false,
993            final_output: Output::zero(),
994            trans: vec![trans(2, b'\xff')],
995        };
996        let (addr, buf) = compile(&bnode);
997        let node = Node::new(VERSION, addr, &buf);
998        assert_eq!(node.as_slice().len(), 4);
999        roundtrip(&bnode);
1000    }
1001
1002    #[test]
1003    fn bin_many_trans() {
1004        let bnode = BuilderNode {
1005            is_final: false,
1006            final_output: Output::zero(),
1007            trans: vec![
1008                trans(2, b'a'),
1009                trans(3, b'b'),
1010                trans(4, b'c'),
1011                trans(5, b'd'),
1012                trans(6, b'e'),
1013                trans(7, b'f'),
1014            ],
1015        };
1016        let (addr, buf) = compile(&bnode);
1017        let node = Node::new(VERSION, addr, &buf);
1018        assert_eq!(node.as_slice().len(), 14);
1019        roundtrip(&bnode);
1020    }
1021
1022    #[test]
1023    fn node_max_trans() {
1024        let bnode = BuilderNode {
1025            is_final: false,
1026            final_output: Output::zero(),
1027            trans: (0..256).map(|i| trans(0, i as u8)).collect(),
1028        };
1029        let (addr, buf) = compile(&bnode);
1030        let node = Node::new(VERSION, addr, &buf);
1031        assert_eq!(node.transitions().count(), 256);
1032        assert_eq!(node.len(), node.transitions().count());
1033        roundtrip(&bnode);
1034    }
1035}