fst_no_std/raw/
mod.rs

1/*!
2Operations on raw finite state transducers.
3
4This sub-module exposes the guts of a finite state transducer. Many parts of
5it, such as construction and traversal, are mirrored in the `set` and `map`
6sub-modules. Other parts of it, such as direct access to nodes and transitions
7in the transducer, do not have any analog.
8
9# Overview of types
10
11`Fst` is a read only interface to pre-constructed finite state transducers.
12`Node` is a read only interface to a single node in a transducer. `Builder` is
13used to create new finite state transducers. (Once a transducer is created, it
14can never be modified.) `Stream` is a stream of all inputs and outputs in a
15transducer. `StreamBuilder` builds range queries. `OpBuilder` collects streams
16and executes set operations like `union` or `intersection` on them with the
17option of specifying a merge strategy for output values.
18
19Most of the rest of the types are streams from set operations.
20*/
21use core::cmp;
22use core::fmt;
23
24#[cfg(feature = "alloc")]
25use crate::automaton::{AlwaysMatch, Automaton};
26use crate::bytes;
27use crate::error::Result;
28#[cfg(feature = "alloc")]
29use crate::stream::{IntoStreamer, Streamer};
30
31#[cfg(feature = "std")]
32pub use crate::raw::build::Builder;
33pub use crate::raw::error::Error;
34pub use crate::raw::node::{Node, Transitions};
35pub use crate::raw::ops::IndexedValue;
36#[cfg(feature = "alloc")]
37pub use crate::raw::ops::{
38    Difference, Intersection, OpBuilder, SymmetricDifference, Union,
39};
40#[cfg(feature = "alloc")]
41use alloc::{borrow::ToOwned, string::String, vec, vec::Vec};
42
43#[cfg(feature = "std")]
44mod build;
45mod common_inputs;
46#[cfg(feature = "std")]
47mod counting_writer;
48mod crc32;
49mod crc32_table;
50mod error;
51mod node;
52mod ops;
53mod registry;
54mod registry_minimal;
55#[cfg(test)]
56mod tests;
57
58/// The API version of this crate.
59///
60/// This version number is written to every finite state transducer created by
61/// this crate. When a finite state transducer is read, its version number is
62/// checked against this value.
63///
64/// Currently, any version mismatch results in an error. Fixing this requires
65/// regenerating the finite state transducer or switching to a version of this
66/// crate that is compatible with the serialized transducer. This particular
67/// behavior may be relaxed in future versions.
68pub const VERSION: u64 = 3;
69
70/// A sentinel value used to indicate an empty final state.
71const EMPTY_ADDRESS: CompiledAddr = 0;
72
73/// A sentinel value used to indicate an invalid state.
74///
75/// This is never the address of a node in a serialized transducer.
76const NONE_ADDRESS: CompiledAddr = 1;
77
78/// `FstType` is a convention used to indicate the type of the underlying
79/// transducer.
80///
81/// This crate reserves the range 0-255 (inclusive) but currently leaves the
82/// meaning of 0-255 unspecified.
83pub type FstType = u64;
84
85/// `CompiledAddr` is the type used to address nodes in a finite state
86/// transducer.
87///
88/// It is most useful as a pointer to nodes. It can be used in the `Fst::node`
89/// method to resolve the pointer.
90pub type CompiledAddr = usize;
91
92/// An acyclic deterministic finite state transducer.
93///
94/// # How does it work?
95///
96/// The short answer: it's just like a prefix trie, which compresses keys
97/// based only on their prefixes, except that a automaton/transducer also
98/// compresses suffixes.
99///
100/// The longer answer is that keys in an automaton are stored only in the
101/// transitions from one state to another. A key can be acquired by tracing
102/// a path from the root of the automaton to any match state. The inputs along
103/// each transition are concatenated. Once a match state is reached, the
104/// concatenation of inputs up until that point corresponds to a single key.
105///
106/// But why is it called a transducer instead of an automaton? A finite state
107/// transducer is just like a finite state automaton, except that it has output
108/// transitions in addition to input transitions. Namely, the value associated
109/// with any particular key is determined by summing the outputs along every
110/// input transition that leads to the key's corresponding match state.
111///
112/// This is best demonstrated with a couple images. First, let's ignore the
113/// "transducer" aspect and focus on a plain automaton.
114///
115/// Consider that your keys are abbreviations of some of the months in the
116/// Gregorian calendar:
117///
118/// ```plain
119/// jan
120/// feb
121/// mar
122/// may
123/// jun
124/// jul
125/// ```
126///
127/// The corresponding automaton that stores all of these as keys looks like
128/// this:
129///
130/// ![finite state automaton](https://burntsushi.net/stuff/months-set.png)
131///
132/// Notice here how the prefix and suffix of `jan` and `jun` are shared.
133/// Similarly, the prefixes of `jun` and `jul` are shared and the prefixes
134/// of `mar` and `may` are shared.
135///
136/// All of the keys from this automaton can be enumerated in lexicographic
137/// order by following every transition from each node in lexicographic
138/// order. Since it is acyclic, the procedure will terminate.
139///
140/// A key can be found by tracing it through the transitions in the automaton.
141/// For example, the key `aug` is known not to be in the automaton by only
142/// visiting the root state (because there is no `a` transition). For another
143/// example, the key `jax` is known not to be in the set only after moving
144/// through the transitions for `j` and `a`. Namely, after those transitions
145/// are followed, there are no transitions for `x`.
146///
147/// Notice here that looking up a key is proportional the length of the key
148/// itself. Namely, lookup time is not affected by the number of keys in the
149/// automaton!
150///
151/// Additionally, notice that the automaton exploits the fact that many keys
152/// share common prefixes and suffixes. For example, `jun` and `jul` are
153/// represented with no more states than would be required to represent either
154/// one on its own. Instead, the only change is a single extra transition. This
155/// is a form of compression and is key to how the automatons produced by this
156/// crate are so small.
157///
158/// Let's move on to finite state transducers. Consider the same set of keys
159/// as above, but let's assign their numeric month values:
160///
161/// ```plain
162/// jan,1
163/// feb,2
164/// mar,3
165/// may,5
166/// jun,6
167/// jul,7
168/// ```
169///
170/// The corresponding transducer looks very similar to the automaton above,
171/// except outputs have been added to some of the transitions:
172///
173/// ![finite state transducer](https://burntsushi.net/stuff/months-map.png)
174///
175/// All of the operations with a transducer are the same as described above
176/// for automatons. Additionally, the same compression techniques are used:
177/// common prefixes and suffixes in keys are exploited.
178///
179/// The key difference is that some transitions have been given an output.
180/// As one follows input transitions, one must sum the outputs as they
181/// are seen. (A transition with no output represents the additive identity,
182/// or `0` in this case.) For example, when looking up `feb`, the transition
183/// `f` has output `2`, the transition `e` has output `0`, and the transition
184/// `b` also has output `0`. The sum of these is `2`, which is exactly the
185/// value we associated with `feb`.
186///
187/// For another more interesting example, consider `jul`. The `j` transition
188/// has output `1`, the `u` transition has output `5` and the `l` transition
189/// has output `1`. Summing these together gets us `7`, which is again the
190/// correct value associated with `jul`. Notice that if we instead looked up
191/// the `jun` key, then the `n` transition would be followed instead of the
192/// `l` transition, which has no output. Therefore, the `jun` key equals
193/// `1+5+0=6`.
194///
195/// The trick to transducers is that there exists a unique path through the
196/// transducer for every key, and its outputs are stored appropriately along
197/// this path such that the correct value is returned when they are all summed
198/// together. This process also enables the data that makes up each value to be
199/// shared across many values in the transducer in exactly the same way that
200/// keys are shared. This is yet another form of compression!
201///
202/// # Bonus: a billion strings
203///
204/// The amount of compression one can get from automata can be absolutely
205/// ridiuclous. Consider the particular case of storing all billion strings
206/// in the range `0000000001-1000000000`, e.g.,
207///
208/// ```plain
209/// 0000000001
210/// 0000000002
211/// ...
212/// 0000000100
213/// 0000000101
214/// ...
215/// 0999999999
216/// 1000000000
217/// ```
218///
219/// The corresponding automaton looks like this:
220///
221/// ![finite state automaton - one billion strings](https://burntsushi.net/stuff/one-billion.png)
222///
223/// Indeed, the on disk size of this automaton is a mere **251 bytes**.
224///
225/// Of course, this is a bit of a pathological best case, but it does serve
226/// to show how good compression can be in the optimal case.
227///
228/// Also, check out the
229/// [corresponding transducer](https://burntsushi.net/stuff/one-billion-map.svg)
230/// that maps each string to its integer value. It's a bit bigger, but still
231/// only takes up **896 bytes** of space on disk. This demonstrates that
232/// output values are also compressible.
233///
234/// # Does this crate produce minimal transducers?
235///
236/// For any non-trivial sized set of keys, it is unlikely that this crate will
237/// produce a minimal transducer. As far as this author knows, guaranteeing a
238/// minimal transducer requires working memory proportional to the number of
239/// states. This can be quite costly and is anathema to the main design goal of
240/// this crate: provide the ability to work with gigantic sets of strings with
241/// constant memory overhead.
242///
243/// Instead, construction of a finite state transducer uses a cache of
244/// states. More frequently used states are cached and reused, which provides
245/// reasonably good compression ratios. (No comprehensive benchmarks exist to
246/// back up this claim.)
247///
248/// It is possible that this crate may expose a way to guarantee minimal
249/// construction of transducers at the expense of exorbitant memory
250/// requirements.
251///
252/// # Bibliography
253///
254/// I initially got the idea to use finite state tranducers to represent
255/// ordered sets/maps from
256/// [Michael
257/// McCandless'](http://blog.mikemccandless.com/2010/12/using-finite-state-transducers-in.html)
258/// work on incorporating transducers in Lucene.
259///
260/// However, my work would also not have been possible without the hard work
261/// of many academics, especially
262/// [Jan Daciuk](http://galaxy.eti.pg.gda.pl/katedry/kiw/pracownicy/Jan.Daciuk/personal/).
263///
264/// * [Incremental construction of minimal acyclic finite-state automata](https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601)
265///   (Section 3 provides a decent overview of the algorithm used to construct
266///   transducers in this crate, assuming all outputs are `0`.)
267/// * [Direct Construction of Minimal Acyclic Subsequential Transducers](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.3698&rep=rep1&type=pdf)
268///   (The whole thing. The proof is dense but illuminating. The algorithm at
269///   the end is the money shot, namely, it incorporates output values.)
270/// * [Experiments with Automata Compression](https://www.researchgate.net/profile/Jiri-Dvorsky/publication/221568039_Word_Random_Access_Compression/links/0c96052c095630d5b3000000/Word-Random-Access-Compression.pdf#page=116), [Smaller Representation of Finite State Automata](https://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf)
271///   (various compression techniques for representing states/transitions)
272/// * [Jan Daciuk's dissertation](http://www.pg.gda.pl/~jandac/thesis.ps.gz)
273///   (excellent for in depth overview)
274/// * [Comparison of Construction Algorithms for Minimal, Acyclic, Deterministic, Finite-State Automata from Sets of Strings](https://www.cs.mun.ca/~harold/Courses/Old/CS4750/Diary/q3p2qx4lv71m5vew.pdf)
275///   (excellent for surface level overview)
276#[derive(Clone)]
277pub struct Fst<D> {
278    meta: Meta,
279    data: D,
280}
281
282#[derive(Debug, Clone)]
283struct Meta {
284    version: u64,
285    root_addr: CompiledAddr,
286    ty: FstType,
287    len: usize,
288    /// A checksum is missing when the FST version is <= 2. (Checksums were
289    /// added in version 3.)
290    checksum: Option<u32>,
291}
292
293#[cfg(feature = "alloc")]
294impl Fst<Vec<u8>> {
295    /// Create a new FST from an iterator of lexicographically ordered byte
296    /// strings. Every key's value is set to `0`.
297    ///
298    /// If the iterator does not yield values in lexicographic order, then an
299    /// error is returned.
300    ///
301    /// Note that this is a convenience function to build an FST in memory.
302    /// To build an FST that streams to an arbitrary `io::Write`, use
303    /// `raw::Builder`.
304    #[cfg(feature = "std")]
305    pub fn from_iter_set<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
306    where
307        K: AsRef<[u8]>,
308        I: IntoIterator<Item = K>,
309    {
310        let mut builder = Builder::memory();
311        for k in iter {
312            builder.add(k)?;
313        }
314        Ok(builder.into_fst())
315    }
316
317    /// Create a new FST from an iterator of lexicographically ordered byte
318    /// strings. The iterator should consist of tuples, where the first element
319    /// is the byte string and the second element is its corresponding value.
320    ///
321    /// If the iterator does not yield unique keys in lexicographic order, then
322    /// an error is returned.
323    ///
324    /// Note that this is a convenience function to build an FST in memory.
325    /// To build an FST that streams to an arbitrary `io::Write`, use
326    /// `raw::Builder`.
327    #[cfg(feature = "std")]
328    pub fn from_iter_map<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
329    where
330        K: AsRef<[u8]>,
331        I: IntoIterator<Item = (K, u64)>,
332    {
333        let mut builder = Builder::memory();
334        for (k, v) in iter {
335            builder.insert(k, v)?;
336        }
337        Ok(builder.into_fst())
338    }
339}
340
341impl<D: AsRef<[u8]>> Fst<D> {
342    /// Creates a transducer from its representation as a raw byte sequence.
343    ///
344    /// This operation is intentionally very cheap (no allocations and no
345    /// copies). In particular, no verification on the integrity of the
346    /// FST is performed. Callers may opt into integrity checks via the
347    /// [`Fst::verify`](struct.Fst.html#method.verify) method.
348    ///
349    /// The fst must have been written with a compatible finite state
350    /// transducer builder (`Builder` qualifies). If the format is invalid or
351    /// if there is a mismatch between the API version of this library and the
352    /// fst, then an error is returned.
353    #[inline]
354    pub fn new(data: D) -> Result<Fst<D>> {
355        let bytes = data.as_ref();
356        if bytes.len() < 36 {
357            return Err(Error::Format { size: bytes.len() }.into());
358        }
359        // The read_u64 unwraps below are OK because they can never fail.
360        // They can only fail when there is an IO error or if there is an
361        // unexpected EOF. However, we are reading from a byte slice (no
362        // IO errors possible) and we've confirmed the byte slice is at least
363        // N bytes (no unexpected EOF).
364        let version = bytes::read_u64_le(bytes);
365        if version == 0 || version > VERSION {
366            return Err(
367                Error::Version { expected: VERSION, got: version }.into()
368            );
369        }
370        let ty = bytes::read_u64_le(&bytes[8..]);
371
372        let (end, checksum) = if version <= 2 {
373            (bytes.len(), None)
374        } else {
375            let checksum = bytes::read_u32_le(&bytes[bytes.len() - 4..]);
376            (bytes.len() - 4, Some(checksum))
377        };
378        let root_addr = {
379            let last = &bytes[end - 8..];
380            u64_to_usize(bytes::read_u64_le(last))
381        };
382        let len = {
383            let last2 = &bytes[end - 16..];
384            u64_to_usize(bytes::read_u64_le(last2))
385        };
386        // The root node is always the last node written, so its address should
387        // be near the end. After the root node is written, we still have to
388        // write the root *address* and the number of keys in the FST, along
389        // with the checksum. That's 20 bytes. The extra byte used below (21
390        // and not 20) comes from the fact that the root address points to
391        // the last byte in the root node, rather than the byte immediately
392        // following the root node.
393        //
394        // If this check passes, it is still possible that the FST is invalid
395        // but probably unlikely. If this check reports a false positive, then
396        // the program will probably panic. In the worst case, the FST will
397        // operate but be subtly wrong. (This would require the bytes to be in
398        // a format expected by an FST, which is incredibly unlikely.)
399        //
400        // The special check for EMPTY_ADDRESS is needed since an empty FST
401        // has a root node that is empty and final, which means it has the
402        // special address `0`. In that case, the FST is the smallest it can
403        // be: the version, type, root address and number of nodes. That's
404        // 36 bytes (8 byte u64 each).
405        //
406        // And finally, our calculation changes somewhat based on version.
407        // If the FST version is less than 3, then it does not have a checksum.
408        let (empty_total, addr_offset) =
409            if version <= 2 { (32, 17) } else { (36, 21) };
410        if (root_addr == EMPTY_ADDRESS && bytes.len() != empty_total)
411            && root_addr + addr_offset != bytes.len()
412        {
413            return Err(Error::Format { size: bytes.len() }.into());
414        }
415        let meta = Meta { version, root_addr, ty, len, checksum };
416        Ok(Fst { meta, data })
417    }
418
419    /// Retrieves the value associated with a key.
420    ///
421    /// If the key does not exist, then `None` is returned.
422    #[inline]
423    pub fn get<B: AsRef<[u8]>>(&self, key: B) -> Option<Output> {
424        self.as_ref().get(key.as_ref())
425    }
426
427    /// Returns true if and only if the given key is in this FST.
428    #[inline]
429    pub fn contains_key<B: AsRef<[u8]>>(&self, key: B) -> bool {
430        self.as_ref().contains_key(key.as_ref())
431    }
432
433    /// Retrieves the key associated with the given value.
434    ///
435    /// This is like `get_key_into`, but will return the key itself without
436    /// allowing the caller to reuse an allocation.
437    ///
438    /// If the given value does not exist, then `None` is returned.
439    ///
440    /// The values in this FST are not monotonically increasing when sorted
441    /// lexicographically by key, then this routine has unspecified behavior.
442    #[inline]
443    #[cfg(feature = "alloc")]
444    pub fn get_key(&self, value: u64) -> Option<Vec<u8>> {
445        let mut key = vec![];
446        if self.get_key_into(value, &mut key) {
447            Some(key)
448        } else {
449            None
450        }
451    }
452
453    /// Retrieves the key associated with the given value.
454    ///
455    /// If the given value does not exist, then `false` is returned. In this
456    /// case, the contents of `key` are unspecified.
457    ///
458    /// The given buffer is not clearer before the key is written to it.
459    ///
460    /// The values in this FST are not monotonically increasing when sorted
461    /// lexicographically by key, then this routine has unspecified behavior.
462    #[inline]
463    #[cfg(feature = "alloc")]
464    pub fn get_key_into(&self, value: u64, key: &mut Vec<u8>) -> bool {
465        self.as_ref().get_key_into(value, key)
466    }
467
468    /// Return a lexicographically ordered stream of all key-value pairs in
469    /// this fst.
470    #[inline]
471    #[cfg(feature = "alloc")]
472    pub fn stream(&self) -> Stream<'_> {
473        StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
474    }
475
476    /// Return a builder for range queries.
477    ///
478    /// A range query returns a subset of key-value pairs in this fst in a
479    /// range given in lexicographic order.
480    #[inline]
481    #[cfg(feature = "alloc")]
482    pub fn range(&self) -> StreamBuilder<'_> {
483        StreamBuilder::new(self.as_ref(), AlwaysMatch)
484    }
485
486    /// Executes an automaton on the keys of this FST.
487    #[inline]
488    #[cfg(feature = "alloc")]
489    pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<'_, A> {
490        StreamBuilder::new(self.as_ref(), aut)
491    }
492
493    /// Executes an automaton on the keys of this FST and yields matching
494    /// keys along with the corresponding matching states in the given
495    /// automaton.
496    #[inline]
497    #[cfg(feature = "alloc")]
498    pub fn search_with_state<A: Automaton>(
499        &self,
500        aut: A,
501    ) -> StreamWithStateBuilder<'_, A> {
502        StreamWithStateBuilder::new(self.as_ref(), aut)
503    }
504
505    /// Returns the number of keys in this fst.
506    #[inline]
507    pub fn len(&self) -> usize {
508        self.as_ref().len()
509    }
510
511    /// Returns true if and only if this fst has no keys.
512    #[inline]
513    pub fn is_empty(&self) -> bool {
514        self.as_ref().is_empty()
515    }
516
517    /// Returns the number of bytes used by this fst.
518    #[inline]
519    pub fn size(&self) -> usize {
520        self.as_ref().size()
521    }
522
523    /// Attempts to verify this FST by computing its checksum.
524    ///
525    /// This will scan over all of the bytes in the underlying FST, so this
526    /// may be an expensive operation depending on the size of the FST.
527    ///
528    /// This returns an error in two cases:
529    ///
530    /// 1. When a checksum does not exist, which is the case for FSTs that were
531    ///    produced by the `fst` crate before version `0.4`.
532    /// 2. When the checksum in the FST does not match the computed checksum
533    ///    performed by this procedure.
534    #[inline]
535    pub fn verify(&self) -> Result<()> {
536        use crate::raw::crc32::CheckSummer;
537
538        let expected = match self.as_ref().meta.checksum {
539            None => return Err(Error::ChecksumMissing.into()),
540            Some(expected) => expected,
541        };
542        let mut summer = CheckSummer::new();
543        summer.update(&self.as_bytes()[..self.as_bytes().len() - 4]);
544        let got = summer.masked();
545        if expected == got {
546            return Ok(());
547        }
548        Err(Error::ChecksumMismatch { expected, got }.into())
549    }
550
551    /// Creates a new fst operation with this fst added to it.
552    ///
553    /// The `OpBuilder` type can be used to add additional fst streams
554    /// and perform set operations like union, intersection, difference and
555    /// symmetric difference on the keys of the fst. These set operations also
556    /// allow one to specify how conflicting values are merged in the stream.
557    #[inline]
558    #[cfg(feature = "alloc")]
559    pub fn op(&self) -> OpBuilder<'_> {
560        OpBuilder::new().add(self)
561    }
562
563    /// Returns true if and only if the `self` fst is disjoint with the fst
564    /// `stream`.
565    ///
566    /// `stream` must be a lexicographically ordered sequence of byte strings
567    /// with associated values.
568    #[inline]
569    #[cfg(feature = "alloc")]
570    pub fn is_disjoint<'f, I, S>(&self, stream: I) -> bool
571    where
572        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
573        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
574    {
575        self.op().add(stream).intersection().next().is_none()
576    }
577
578    /// Returns true if and only if the `self` fst is a subset of the fst
579    /// `stream`.
580    ///
581    /// `stream` must be a lexicographically ordered sequence of byte strings
582    /// with associated values.
583    #[inline]
584    #[cfg(feature = "alloc")]
585    pub fn is_subset<'f, I, S>(&self, stream: I) -> bool
586    where
587        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
588        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
589    {
590        let mut op = self.op().add(stream).intersection();
591        let mut count = 0;
592        while op.next().is_some() {
593            count += 1;
594        }
595        count == self.len()
596    }
597
598    /// Returns true if and only if the `self` fst is a superset of the fst
599    /// `stream`.
600    ///
601    /// `stream` must be a lexicographically ordered sequence of byte strings
602    /// with associated values.
603    #[inline]
604    #[cfg(feature = "alloc")]
605    pub fn is_superset<'f, I, S>(&self, stream: I) -> bool
606    where
607        I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
608        S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
609    {
610        let mut op = self.op().add(stream).union();
611        let mut count = 0;
612        while op.next().is_some() {
613            count += 1;
614        }
615        count == self.len()
616    }
617
618    /// Returns the underlying type of this fst.
619    ///
620    /// `FstType` is a convention used to indicate the type of the underlying
621    /// transducer.
622    ///
623    /// This crate reserves the range 0-255 (inclusive) but currently leaves
624    /// the meaning of 0-255 unspecified.
625    #[inline]
626    pub fn fst_type(&self) -> FstType {
627        self.as_ref().fst_type()
628    }
629
630    /// Returns the root node of this fst.
631    #[inline]
632    pub fn root(&self) -> Node<'_> {
633        self.as_ref().root()
634    }
635
636    /// Returns the node at the given address.
637    ///
638    /// Node addresses can be obtained by reading transitions on `Node` values.
639    #[inline]
640    pub fn node(&self, addr: CompiledAddr) -> Node<'_> {
641        self.as_ref().node(addr)
642    }
643
644    /// Returns a copy of the binary contents of this FST.
645    #[inline]
646    #[cfg(feature = "alloc")]
647    pub fn to_vec(&self) -> Vec<u8> {
648        self.as_ref().to_vec()
649    }
650
651    /// Returns the binary contents of this FST.
652    #[inline]
653    pub fn as_bytes(&self) -> &[u8] {
654        self.as_ref().as_bytes()
655    }
656
657    #[inline]
658    fn as_ref(&self) -> FstRef {
659        FstRef { meta: &self.meta, data: self.data.as_ref() }
660    }
661}
662
663impl<D> Fst<D> {
664    /// Returns the underlying data which constitutes the FST itself.
665    #[inline]
666    pub fn into_inner(self) -> D {
667        self.data
668    }
669
670    /// Returns a borrow to the underlying data which constitutes the FST itself.
671    #[inline]
672    pub fn as_inner(&self) -> &D {
673        &self.data
674    }
675
676    /// Maps the underlying data of the fst to another data type.
677    #[inline]
678    pub fn map_data<F, T>(self, mut f: F) -> Result<Fst<T>>
679    where
680        F: FnMut(D) -> T,
681        T: AsRef<[u8]>,
682    {
683        Fst::new(f(self.into_inner()))
684    }
685}
686
687#[cfg(feature = "alloc")]
688impl<'a, 'f, D: AsRef<[u8]>> IntoStreamer<'a> for &'f Fst<D> {
689    type Item = (&'a [u8], Output);
690    type Into = Stream<'f>;
691
692    #[inline]
693    #[cfg(feature = "alloc")]
694    fn into_stream(self) -> Stream<'f> {
695        StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
696    }
697}
698
699struct FstRef<'f> {
700    meta: &'f Meta,
701    data: &'f [u8],
702}
703
704impl<'f> FstRef<'f> {
705    #[inline]
706    fn get(&self, key: &[u8]) -> Option<Output> {
707        let mut node = self.root();
708        let mut out = Output::zero();
709        for &b in key {
710            node = match node.find_input(b) {
711                None => return None,
712                Some(i) => {
713                    let t = node.transition(i);
714                    out = out.cat(t.out);
715                    self.node(t.addr)
716                }
717            }
718        }
719        if !node.is_final() {
720            None
721        } else {
722            Some(out.cat(node.final_output()))
723        }
724    }
725
726    #[inline]
727    fn contains_key(&self, key: &[u8]) -> bool {
728        let mut node = self.root();
729        for &b in key {
730            node = match node.find_input(b) {
731                None => return false,
732                Some(i) => self.node(node.transition_addr(i)),
733            }
734        }
735        node.is_final()
736    }
737
738    #[inline]
739    #[cfg(feature = "alloc")]
740    fn get_key_into(&self, mut value: u64, key: &mut Vec<u8>) -> bool {
741        let mut node = self.root();
742        while value != 0 || !node.is_final() {
743            let trans = node
744                .transitions()
745                .take_while(|t| t.out.value() <= value)
746                .last();
747            node = match trans {
748                None => return false,
749                Some(t) => {
750                    value -= t.out.value();
751                    key.push(t.inp);
752                    self.node(t.addr)
753                }
754            };
755        }
756        true
757    }
758
759    #[inline]
760    fn len(&self) -> usize {
761        self.meta.len
762    }
763
764    #[inline]
765    fn is_empty(&self) -> bool {
766        self.meta.len == 0
767    }
768
769    #[inline]
770    fn size(&self) -> usize {
771        self.as_bytes().len()
772    }
773
774    #[inline]
775    fn fst_type(&self) -> FstType {
776        self.meta.ty
777    }
778
779    #[inline]
780    fn root_addr(&self) -> CompiledAddr {
781        self.meta.root_addr
782    }
783
784    #[inline]
785    fn root(&self) -> Node<'f> {
786        self.node(self.root_addr())
787    }
788
789    #[inline]
790    fn node(&self, addr: CompiledAddr) -> Node<'f> {
791        Node::new(self.meta.version, addr, self.as_bytes())
792    }
793
794    #[inline]
795    #[cfg(feature = "alloc")]
796    fn to_vec(&self) -> Vec<u8> {
797        self.as_bytes().to_vec()
798    }
799
800    #[inline]
801    fn as_bytes(&self) -> &'f [u8] {
802        self.data
803    }
804
805    #[inline]
806    #[cfg(feature = "alloc")]
807    fn empty_final_output(&self) -> Option<Output> {
808        let root = self.root();
809        if root.is_final() {
810            Some(root.final_output())
811        } else {
812            None
813        }
814    }
815}
816
817/// A builder for constructing range queries on streams.
818///
819/// Once all bounds are set, one should call `into_stream` to get a
820/// `Stream`.
821///
822/// Bounds are not additive. That is, if `ge` is called twice on the same
823/// builder, then the second setting wins.
824///
825/// The `A` type parameter corresponds to an optional automaton to filter
826/// the stream. By default, no filtering is done.
827///
828/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
829#[cfg(feature = "alloc")]
830pub struct StreamBuilder<'f, A = AlwaysMatch> {
831    fst: FstRef<'f>,
832    aut: A,
833    min: Bound,
834    max: Bound,
835}
836
837#[cfg(feature = "alloc")]
838impl<'f, A: Automaton> StreamBuilder<'f, A> {
839    fn new(fst: FstRef<'f>, aut: A) -> StreamBuilder<'f, A> {
840        StreamBuilder {
841            fst,
842            aut,
843            min: Bound::Unbounded,
844            max: Bound::Unbounded,
845        }
846    }
847
848    /// Specify a greater-than-or-equal-to bound.
849    #[cfg(feature = "alloc")]
850    pub fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
851        self.min = Bound::Included(bound.as_ref().to_owned());
852        self
853    }
854
855    /// Specify a greater-than bound.
856    #[cfg(feature = "alloc")]
857    pub fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
858        self.min = Bound::Excluded(bound.as_ref().to_owned());
859        self
860    }
861
862    /// Specify a less-than-or-equal-to bound.
863    #[cfg(feature = "alloc")]
864    pub fn le<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
865        self.max = Bound::Included(bound.as_ref().to_owned());
866        self
867    }
868
869    /// Specify a less-than bound.
870    #[cfg(feature = "alloc")]
871    pub fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
872        self.max = Bound::Excluded(bound.as_ref().to_owned());
873        self
874    }
875}
876
877#[cfg(feature = "alloc")]
878impl<'a, 'f, A: Automaton> IntoStreamer<'a> for StreamBuilder<'f, A> {
879    type Item = (&'a [u8], Output);
880    type Into = Stream<'f, A>;
881
882    fn into_stream(self) -> Stream<'f, A> {
883        Stream::new(self.fst, self.aut, self.min, self.max)
884    }
885}
886
887/// A builder for constructing range queries on streams that include automaton
888/// states.
889///
890/// In general, one should use `StreamBuilder` unless you have a specific need
891/// for accessing the states of the underlying automaton that is being used to
892/// filter this stream.
893///
894/// Once all bounds are set, one should call `into_stream` to get a
895/// `Stream`.
896///
897/// Bounds are not additive. That is, if `ge` is called twice on the same
898/// builder, then the second setting wins.
899///
900/// The `A` type parameter corresponds to an optional automaton to filter
901/// the stream. By default, no filtering is done.
902///
903/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
904#[cfg(feature = "alloc")]
905pub struct StreamWithStateBuilder<'f, A = AlwaysMatch> {
906    fst: FstRef<'f>,
907    aut: A,
908    min: Bound,
909    max: Bound,
910}
911
912#[cfg(feature = "alloc")]
913impl<'f, A: Automaton> StreamWithStateBuilder<'f, A> {
914    fn new(fst: FstRef<'f>, aut: A) -> StreamWithStateBuilder<'f, A> {
915        StreamWithStateBuilder {
916            fst,
917            aut,
918            min: Bound::Unbounded,
919            max: Bound::Unbounded,
920        }
921    }
922
923    /// Specify a greater-than-or-equal-to bound.
924    pub fn ge<T: AsRef<[u8]>>(
925        mut self,
926        bound: T,
927    ) -> StreamWithStateBuilder<'f, A> {
928        self.min = Bound::Included(bound.as_ref().to_owned());
929        self
930    }
931
932    /// Specify a greater-than bound.
933    pub fn gt<T: AsRef<[u8]>>(
934        mut self,
935        bound: T,
936    ) -> StreamWithStateBuilder<'f, A> {
937        self.min = Bound::Excluded(bound.as_ref().to_owned());
938        self
939    }
940
941    /// Specify a less-than-or-equal-to bound.
942    pub fn le<T: AsRef<[u8]>>(
943        mut self,
944        bound: T,
945    ) -> StreamWithStateBuilder<'f, A> {
946        self.max = Bound::Included(bound.as_ref().to_owned());
947        self
948    }
949
950    /// Specify a less-than bound.
951    pub fn lt<T: AsRef<[u8]>>(
952        mut self,
953        bound: T,
954    ) -> StreamWithStateBuilder<'f, A> {
955        self.max = Bound::Excluded(bound.as_ref().to_owned());
956        self
957    }
958}
959
960#[cfg(feature = "alloc")]
961impl<'a, 'f, A: 'a + Automaton> IntoStreamer<'a>
962    for StreamWithStateBuilder<'f, A>
963where
964    A::State: Clone,
965{
966    type Item = (&'a [u8], Output, A::State);
967    type Into = StreamWithState<'f, A>;
968
969    fn into_stream(self) -> StreamWithState<'f, A> {
970        StreamWithState::new(self.fst, self.aut, self.min, self.max)
971    }
972}
973
974#[derive(Debug)]
975#[cfg(feature = "alloc")]
976enum Bound {
977    Included(Vec<u8>),
978    Excluded(Vec<u8>),
979    Unbounded,
980}
981
982#[cfg(feature = "alloc")]
983impl Bound {
984    #[inline]
985    fn exceeded_by(&self, inp: &[u8]) -> bool {
986        match *self {
987            Bound::Included(ref v) => inp > v,
988            Bound::Excluded(ref v) => inp >= v,
989            Bound::Unbounded => false,
990        }
991    }
992
993    #[inline]
994    fn is_empty(&self) -> bool {
995        match *self {
996            Bound::Included(ref v) => v.is_empty(),
997            Bound::Excluded(ref v) => v.is_empty(),
998            Bound::Unbounded => true,
999        }
1000    }
1001
1002    #[inline]
1003    fn is_inclusive(&self) -> bool {
1004        matches!(*self, Bound::Included(_) | Bound::Unbounded)
1005    }
1006}
1007
1008/// A lexicographically ordered stream of key-value pairs from an fst.
1009///
1010/// The `A` type parameter corresponds to an optional automaton to filter
1011/// the stream. By default, no filtering is done.
1012///
1013/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
1014#[cfg(feature = "alloc")]
1015pub struct Stream<'f, A: Automaton = AlwaysMatch>(StreamWithState<'f, A>);
1016
1017#[cfg(feature = "alloc")]
1018impl<'f, A: Automaton> Stream<'f, A> {
1019    fn new(fst: FstRef<'f>, aut: A, min: Bound, max: Bound) -> Stream<'f, A> {
1020        Stream(StreamWithState::new(fst, aut, min, max))
1021    }
1022
1023    /// Convert this stream into a vector of byte strings and outputs.
1024    ///
1025    /// Note that this creates a new allocation for every key in the stream.
1026    pub fn into_byte_vec(mut self) -> Vec<(Vec<u8>, u64)> {
1027        let mut vs = vec![];
1028        while let Some((k, v)) = self.next() {
1029            vs.push((k.to_vec(), v.value()));
1030        }
1031        vs
1032    }
1033
1034    /// Convert this stream into a vector of Unicode strings and outputs.
1035    ///
1036    /// If any key is not valid UTF-8, then iteration on the stream is stopped
1037    /// and a UTF-8 decoding error is returned.
1038    ///
1039    /// Note that this creates a new allocation for every key in the stream.
1040    pub fn into_str_vec(mut self) -> Result<Vec<(String, u64)>> {
1041        let mut vs = vec![];
1042        while let Some((k, v)) = self.next() {
1043            let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
1044            vs.push((k, v.value()));
1045        }
1046        Ok(vs)
1047    }
1048
1049    /// Convert this stream into a vector of byte strings.
1050    ///
1051    /// Note that this creates a new allocation for every key in the stream.
1052    pub fn into_byte_keys(mut self) -> Vec<Vec<u8>> {
1053        let mut vs = vec![];
1054        while let Some((k, _)) = self.next() {
1055            vs.push(k.to_vec());
1056        }
1057        vs
1058    }
1059
1060    /// Convert this stream into a vector of Unicode strings.
1061    ///
1062    /// If any key is not valid UTF-8, then iteration on the stream is stopped
1063    /// and a UTF-8 decoding error is returned.
1064    ///
1065    /// Note that this creates a new allocation for every key in the stream.
1066    pub fn into_str_keys(mut self) -> Result<Vec<String>> {
1067        let mut vs = vec![];
1068        while let Some((k, _)) = self.next() {
1069            let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
1070            vs.push(k);
1071        }
1072        Ok(vs)
1073    }
1074
1075    /// Convert this stream into a vector of outputs.
1076    pub fn into_values(mut self) -> Vec<u64> {
1077        let mut vs = vec![];
1078        while let Some((_, v)) = self.next() {
1079            vs.push(v.value());
1080        }
1081        vs
1082    }
1083}
1084
1085#[cfg(feature = "alloc")]
1086impl<'f, 'a, A: Automaton> Streamer<'a> for Stream<'f, A> {
1087    type Item = (&'a [u8], Output);
1088
1089    fn next(&'a mut self) -> Option<(&'a [u8], Output)> {
1090        self.0.next_with(|_| ()).map(|(key, out, ())| (key, out))
1091    }
1092}
1093
1094/// A lexicographically ordered stream of key-value-state triples from an fst
1095/// and an automaton.
1096///
1097/// The key-values are from the underyling FSTP while the states are from the
1098/// automaton.
1099///
1100/// The `A` type parameter corresponds to an optional automaton to filter
1101/// the stream. By default, no filtering is done.
1102///
1103/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1104#[cfg(feature = "alloc")]
1105pub struct StreamWithState<'f, A = AlwaysMatch>
1106where
1107    A: Automaton,
1108{
1109    fst: FstRef<'f>,
1110    aut: A,
1111    inp: Vec<u8>,
1112    empty_output: Option<Output>,
1113    stack: Vec<StreamState<'f, A::State>>,
1114    end_at: Bound,
1115}
1116
1117#[derive(Clone, Debug)]
1118#[cfg(feature = "alloc")]
1119struct StreamState<'f, S> {
1120    node: Node<'f>,
1121    trans: usize,
1122    out: Output,
1123    aut_state: S,
1124}
1125
1126#[cfg(feature = "alloc")]
1127impl<'f, A: Automaton> StreamWithState<'f, A> {
1128    fn new(
1129        fst: FstRef<'f>,
1130        aut: A,
1131        min: Bound,
1132        max: Bound,
1133    ) -> StreamWithState<'f, A> {
1134        let mut rdr = StreamWithState {
1135            fst,
1136            aut,
1137            inp: Vec::with_capacity(16),
1138            empty_output: None,
1139            stack: vec![],
1140            end_at: max,
1141        };
1142        rdr.seek_min(min);
1143        rdr
1144    }
1145
1146    /// Seeks the underlying stream such that the next key to be read is the
1147    /// smallest key in the underlying fst that satisfies the given minimum
1148    /// bound.
1149    ///
1150    /// This theoretically should be straight-forward, but we need to make
1151    /// sure our stack is correct, which includes accounting for automaton
1152    /// states.
1153    fn seek_min(&mut self, min: Bound) {
1154        if min.is_empty() {
1155            if min.is_inclusive() {
1156                self.empty_output = self.fst.empty_final_output();
1157            }
1158            self.stack = vec![StreamState {
1159                node: self.fst.root(),
1160                trans: 0,
1161                out: Output::zero(),
1162                aut_state: self.aut.start(),
1163            }];
1164            return;
1165        }
1166        let (key, inclusive) = match min {
1167            Bound::Excluded(ref min) => (min, false),
1168            Bound::Included(ref min) => (min, true),
1169            Bound::Unbounded => unreachable!(),
1170        };
1171        // At this point, we need to find the starting location of `min` in
1172        // the FST. However, as we search, we need to maintain a stack of
1173        // reader states so that the reader can pick up where we left off.
1174        // N.B. We do not necessarily need to stop in a final state, unlike
1175        // the one-off `find` method. For the example, the given bound might
1176        // not actually exist in the FST.
1177        let mut node = self.fst.root();
1178        let mut out = Output::zero();
1179        let mut aut_state = self.aut.start();
1180        for &b in key {
1181            match node.find_input(b) {
1182                Some(i) => {
1183                    let t = node.transition(i);
1184                    let prev_state = aut_state;
1185                    aut_state = self.aut.accept(&prev_state, b);
1186                    self.inp.push(b);
1187                    self.stack.push(StreamState {
1188                        node,
1189                        trans: i + 1,
1190                        out,
1191                        aut_state: prev_state,
1192                    });
1193                    out = out.cat(t.out);
1194                    node = self.fst.node(t.addr);
1195                }
1196                None => {
1197                    // This is a little tricky. We're in this case if the
1198                    // given bound is not a prefix of any key in the FST.
1199                    // Since this is a minimum bound, we need to find the
1200                    // first transition in this node that proceeds the current
1201                    // input byte.
1202                    self.stack.push(StreamState {
1203                        node,
1204                        trans: node
1205                            .transitions()
1206                            .position(|t| t.inp > b)
1207                            .unwrap_or(node.len()),
1208                        out,
1209                        aut_state,
1210                    });
1211                    return;
1212                }
1213            }
1214        }
1215        if !self.stack.is_empty() {
1216            let last = self.stack.len() - 1;
1217            if inclusive {
1218                self.stack[last].trans -= 1;
1219                self.inp.pop();
1220            } else {
1221                let node = self.stack[last].node;
1222                let trans = self.stack[last].trans;
1223                self.stack.push(StreamState {
1224                    node: self.fst.node(node.transition(trans - 1).addr),
1225                    trans: 0,
1226                    out,
1227                    aut_state,
1228                });
1229            }
1230        }
1231    }
1232
1233    fn next_with<T>(
1234        &mut self,
1235        mut map: impl FnMut(&A::State) -> T,
1236    ) -> Option<(&[u8], Output, T)> {
1237        if let Some(out) = self.empty_output.take() {
1238            if self.end_at.exceeded_by(&[]) {
1239                self.stack.clear();
1240                return None;
1241            }
1242
1243            let start = self.aut.start();
1244            if self.aut.is_match(&start) {
1245                return Some((&[], out, map(&start)));
1246            }
1247        }
1248        while let Some(state) = self.stack.pop() {
1249            if state.trans >= state.node.len()
1250                || !self.aut.can_match(&state.aut_state)
1251            {
1252                if state.node.addr() != self.fst.root_addr() {
1253                    self.inp.pop().unwrap();
1254                }
1255                continue;
1256            }
1257            let trans = state.node.transition(state.trans);
1258            let out = state.out.cat(trans.out);
1259            let next_state = self.aut.accept(&state.aut_state, trans.inp);
1260            let t = map(&next_state);
1261            let mut is_match = self.aut.is_match(&next_state);
1262            let next_node = self.fst.node(trans.addr);
1263            self.inp.push(trans.inp);
1264            if next_node.is_final() {
1265                if let Some(eof_state) = self.aut.accept_eof(&next_state) {
1266                    is_match = self.aut.is_match(&eof_state);
1267                }
1268            }
1269            self.stack.push(StreamState { trans: state.trans + 1, ..state });
1270            self.stack.push(StreamState {
1271                node: next_node,
1272                trans: 0,
1273                out,
1274                aut_state: next_state,
1275            });
1276            if self.end_at.exceeded_by(&self.inp) {
1277                // We are done, forever.
1278                self.stack.clear();
1279                return None;
1280            }
1281            if next_node.is_final() && is_match {
1282                return Some((
1283                    &self.inp,
1284                    out.cat(next_node.final_output()),
1285                    t,
1286                ));
1287            }
1288        }
1289        None
1290    }
1291}
1292
1293#[cfg(feature = "alloc")]
1294impl<'a, 'f, A: 'a + Automaton> Streamer<'a> for StreamWithState<'f, A>
1295where
1296    A::State: Clone,
1297{
1298    type Item = (&'a [u8], Output, A::State);
1299
1300    fn next(&'a mut self) -> Option<(&'a [u8], Output, A::State)> {
1301        self.next_with(Clone::clone)
1302    }
1303}
1304
1305/// An output is a value that is associated with a key in a finite state
1306/// transducer.
1307///
1308/// Note that outputs must satisfy an algebra. Namely, it must have an additive
1309/// identity and the following binary operations defined: `prefix`,
1310/// `concatenation` and `subtraction`. `prefix` and `concatenation` are
1311/// commutative while `subtraction` is not. `subtraction` is only defined on
1312/// pairs of operands where the first operand is greater than or equal to the
1313/// second operand.
1314///
1315/// Currently, output values must be `u64`. However, in theory, an output value
1316/// can be anything that satisfies the above algebra. Future versions of this
1317/// crate may make outputs generic on this algebra.
1318#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
1319pub struct Output(u64);
1320
1321impl Output {
1322    /// Create a new output from a `u64`.
1323    #[inline]
1324    #[must_use]
1325    pub fn new(v: u64) -> Output {
1326        Output(v)
1327    }
1328
1329    /// Create a zero output.
1330    #[inline]
1331    #[must_use]
1332    pub fn zero() -> Output {
1333        Output(0)
1334    }
1335
1336    /// Retrieve the value inside this output.
1337    #[inline]
1338    #[must_use]
1339    pub fn value(self) -> u64 {
1340        self.0
1341    }
1342
1343    /// Returns true if this is a zero output.
1344    #[inline]
1345    #[must_use]
1346    pub fn is_zero(self) -> bool {
1347        self.0 == 0
1348    }
1349
1350    /// Returns the prefix of this output and `o`.
1351    #[inline]
1352    #[must_use]
1353    pub fn prefix(self, o: Output) -> Output {
1354        Output(cmp::min(self.0, o.0))
1355    }
1356
1357    /// Returns the concatenation of this output and `o`.
1358    #[inline]
1359    #[must_use]
1360    pub fn cat(self, o: Output) -> Output {
1361        Output(self.0 + o.0)
1362    }
1363}
1364
1365impl core::ops::Sub for Output {
1366    type Output = Output;
1367
1368    #[inline]
1369    fn sub(self, rhs: Self) -> Self::Output {
1370        Self(
1371            self.0
1372                .checked_sub(rhs.0)
1373                .expect("BUG: underflow subtraction not allowed"),
1374        )
1375    }
1376}
1377
1378/// A transition from one note to another.
1379#[derive(Copy, Clone, Hash, Eq, PartialEq)]
1380pub struct Transition {
1381    /// The byte input associated with this transition.
1382    pub inp: u8,
1383    /// The output associated with this transition.
1384    pub out: Output,
1385    /// The address of the node that this transition points to.
1386    pub addr: CompiledAddr,
1387}
1388
1389impl Default for Transition {
1390    #[inline]
1391    fn default() -> Transition {
1392        Transition { inp: 0, out: Output::zero(), addr: NONE_ADDRESS }
1393    }
1394}
1395
1396impl fmt::Debug for Transition {
1397    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1398        if self.out.is_zero() {
1399            write!(f, "{} -> {}", self.inp as char, self.addr)
1400        } else {
1401            write!(
1402                f,
1403                "({}, {}) -> {}",
1404                self.inp as char,
1405                self.out.value(),
1406                self.addr
1407            )
1408        }
1409    }
1410}
1411
1412#[inline]
1413#[cfg(target_pointer_width = "64")]
1414#[allow(clippy::cast_possible_truncation)]
1415fn u64_to_usize(n: u64) -> usize {
1416    n as usize
1417}
1418
1419#[inline]
1420#[cfg(not(target_pointer_width = "64"))]
1421fn u64_to_usize(n: u64) -> usize {
1422    if n > core::usize::MAX as u64 {
1423        panic!(
1424            "\
1425Cannot convert node address {} to a pointer sized variable. If this FST
1426is very large and was generated on a system with a larger pointer size
1427than this system, then it is not possible to read this FST on this
1428system.",
1429            n
1430        );
1431    }
1432    n as usize
1433}