fst_no_std/raw/mod.rs
1/*!
2Operations on raw finite state transducers.
3
4This sub-module exposes the guts of a finite state transducer. Many parts of
5it, such as construction and traversal, are mirrored in the `set` and `map`
6sub-modules. Other parts of it, such as direct access to nodes and transitions
7in the transducer, do not have any analog.
8
9# Overview of types
10
11`Fst` is a read only interface to pre-constructed finite state transducers.
12`Node` is a read only interface to a single node in a transducer. `Builder` is
13used to create new finite state transducers. (Once a transducer is created, it
14can never be modified.) `Stream` is a stream of all inputs and outputs in a
15transducer. `StreamBuilder` builds range queries. `OpBuilder` collects streams
16and executes set operations like `union` or `intersection` on them with the
17option of specifying a merge strategy for output values.
18
19Most of the rest of the types are streams from set operations.
20*/
21use core::cmp;
22use core::fmt;
23
24#[cfg(feature = "alloc")]
25use crate::automaton::{AlwaysMatch, Automaton};
26use crate::bytes;
27use crate::error::Result;
28#[cfg(feature = "alloc")]
29use crate::stream::{IntoStreamer, Streamer};
30
31#[cfg(feature = "std")]
32pub use crate::raw::build::Builder;
33pub use crate::raw::error::Error;
34pub use crate::raw::node::{Node, Transitions};
35pub use crate::raw::ops::IndexedValue;
36#[cfg(feature = "alloc")]
37pub use crate::raw::ops::{
38 Difference, Intersection, OpBuilder, SymmetricDifference, Union,
39};
40#[cfg(feature = "alloc")]
41use alloc::{borrow::ToOwned, string::String, vec, vec::Vec};
42
43#[cfg(feature = "std")]
44mod build;
45mod common_inputs;
46#[cfg(feature = "std")]
47mod counting_writer;
48mod crc32;
49mod crc32_table;
50mod error;
51mod node;
52mod ops;
53mod registry;
54mod registry_minimal;
55#[cfg(test)]
56mod tests;
57
58/// The API version of this crate.
59///
60/// This version number is written to every finite state transducer created by
61/// this crate. When a finite state transducer is read, its version number is
62/// checked against this value.
63///
64/// Currently, any version mismatch results in an error. Fixing this requires
65/// regenerating the finite state transducer or switching to a version of this
66/// crate that is compatible with the serialized transducer. This particular
67/// behavior may be relaxed in future versions.
68pub const VERSION: u64 = 3;
69
70/// A sentinel value used to indicate an empty final state.
71const EMPTY_ADDRESS: CompiledAddr = 0;
72
73/// A sentinel value used to indicate an invalid state.
74///
75/// This is never the address of a node in a serialized transducer.
76const NONE_ADDRESS: CompiledAddr = 1;
77
78/// `FstType` is a convention used to indicate the type of the underlying
79/// transducer.
80///
81/// This crate reserves the range 0-255 (inclusive) but currently leaves the
82/// meaning of 0-255 unspecified.
83pub type FstType = u64;
84
85/// `CompiledAddr` is the type used to address nodes in a finite state
86/// transducer.
87///
88/// It is most useful as a pointer to nodes. It can be used in the `Fst::node`
89/// method to resolve the pointer.
90pub type CompiledAddr = usize;
91
92/// An acyclic deterministic finite state transducer.
93///
94/// # How does it work?
95///
96/// The short answer: it's just like a prefix trie, which compresses keys
97/// based only on their prefixes, except that a automaton/transducer also
98/// compresses suffixes.
99///
100/// The longer answer is that keys in an automaton are stored only in the
101/// transitions from one state to another. A key can be acquired by tracing
102/// a path from the root of the automaton to any match state. The inputs along
103/// each transition are concatenated. Once a match state is reached, the
104/// concatenation of inputs up until that point corresponds to a single key.
105///
106/// But why is it called a transducer instead of an automaton? A finite state
107/// transducer is just like a finite state automaton, except that it has output
108/// transitions in addition to input transitions. Namely, the value associated
109/// with any particular key is determined by summing the outputs along every
110/// input transition that leads to the key's corresponding match state.
111///
112/// This is best demonstrated with a couple images. First, let's ignore the
113/// "transducer" aspect and focus on a plain automaton.
114///
115/// Consider that your keys are abbreviations of some of the months in the
116/// Gregorian calendar:
117///
118/// ```plain
119/// jan
120/// feb
121/// mar
122/// may
123/// jun
124/// jul
125/// ```
126///
127/// The corresponding automaton that stores all of these as keys looks like
128/// this:
129///
130/// 
131///
132/// Notice here how the prefix and suffix of `jan` and `jun` are shared.
133/// Similarly, the prefixes of `jun` and `jul` are shared and the prefixes
134/// of `mar` and `may` are shared.
135///
136/// All of the keys from this automaton can be enumerated in lexicographic
137/// order by following every transition from each node in lexicographic
138/// order. Since it is acyclic, the procedure will terminate.
139///
140/// A key can be found by tracing it through the transitions in the automaton.
141/// For example, the key `aug` is known not to be in the automaton by only
142/// visiting the root state (because there is no `a` transition). For another
143/// example, the key `jax` is known not to be in the set only after moving
144/// through the transitions for `j` and `a`. Namely, after those transitions
145/// are followed, there are no transitions for `x`.
146///
147/// Notice here that looking up a key is proportional the length of the key
148/// itself. Namely, lookup time is not affected by the number of keys in the
149/// automaton!
150///
151/// Additionally, notice that the automaton exploits the fact that many keys
152/// share common prefixes and suffixes. For example, `jun` and `jul` are
153/// represented with no more states than would be required to represent either
154/// one on its own. Instead, the only change is a single extra transition. This
155/// is a form of compression and is key to how the automatons produced by this
156/// crate are so small.
157///
158/// Let's move on to finite state transducers. Consider the same set of keys
159/// as above, but let's assign their numeric month values:
160///
161/// ```plain
162/// jan,1
163/// feb,2
164/// mar,3
165/// may,5
166/// jun,6
167/// jul,7
168/// ```
169///
170/// The corresponding transducer looks very similar to the automaton above,
171/// except outputs have been added to some of the transitions:
172///
173/// 
174///
175/// All of the operations with a transducer are the same as described above
176/// for automatons. Additionally, the same compression techniques are used:
177/// common prefixes and suffixes in keys are exploited.
178///
179/// The key difference is that some transitions have been given an output.
180/// As one follows input transitions, one must sum the outputs as they
181/// are seen. (A transition with no output represents the additive identity,
182/// or `0` in this case.) For example, when looking up `feb`, the transition
183/// `f` has output `2`, the transition `e` has output `0`, and the transition
184/// `b` also has output `0`. The sum of these is `2`, which is exactly the
185/// value we associated with `feb`.
186///
187/// For another more interesting example, consider `jul`. The `j` transition
188/// has output `1`, the `u` transition has output `5` and the `l` transition
189/// has output `1`. Summing these together gets us `7`, which is again the
190/// correct value associated with `jul`. Notice that if we instead looked up
191/// the `jun` key, then the `n` transition would be followed instead of the
192/// `l` transition, which has no output. Therefore, the `jun` key equals
193/// `1+5+0=6`.
194///
195/// The trick to transducers is that there exists a unique path through the
196/// transducer for every key, and its outputs are stored appropriately along
197/// this path such that the correct value is returned when they are all summed
198/// together. This process also enables the data that makes up each value to be
199/// shared across many values in the transducer in exactly the same way that
200/// keys are shared. This is yet another form of compression!
201///
202/// # Bonus: a billion strings
203///
204/// The amount of compression one can get from automata can be absolutely
205/// ridiuclous. Consider the particular case of storing all billion strings
206/// in the range `0000000001-1000000000`, e.g.,
207///
208/// ```plain
209/// 0000000001
210/// 0000000002
211/// ...
212/// 0000000100
213/// 0000000101
214/// ...
215/// 0999999999
216/// 1000000000
217/// ```
218///
219/// The corresponding automaton looks like this:
220///
221/// 
222///
223/// Indeed, the on disk size of this automaton is a mere **251 bytes**.
224///
225/// Of course, this is a bit of a pathological best case, but it does serve
226/// to show how good compression can be in the optimal case.
227///
228/// Also, check out the
229/// [corresponding transducer](https://burntsushi.net/stuff/one-billion-map.svg)
230/// that maps each string to its integer value. It's a bit bigger, but still
231/// only takes up **896 bytes** of space on disk. This demonstrates that
232/// output values are also compressible.
233///
234/// # Does this crate produce minimal transducers?
235///
236/// For any non-trivial sized set of keys, it is unlikely that this crate will
237/// produce a minimal transducer. As far as this author knows, guaranteeing a
238/// minimal transducer requires working memory proportional to the number of
239/// states. This can be quite costly and is anathema to the main design goal of
240/// this crate: provide the ability to work with gigantic sets of strings with
241/// constant memory overhead.
242///
243/// Instead, construction of a finite state transducer uses a cache of
244/// states. More frequently used states are cached and reused, which provides
245/// reasonably good compression ratios. (No comprehensive benchmarks exist to
246/// back up this claim.)
247///
248/// It is possible that this crate may expose a way to guarantee minimal
249/// construction of transducers at the expense of exorbitant memory
250/// requirements.
251///
252/// # Bibliography
253///
254/// I initially got the idea to use finite state tranducers to represent
255/// ordered sets/maps from
256/// [Michael
257/// McCandless'](http://blog.mikemccandless.com/2010/12/using-finite-state-transducers-in.html)
258/// work on incorporating transducers in Lucene.
259///
260/// However, my work would also not have been possible without the hard work
261/// of many academics, especially
262/// [Jan Daciuk](http://galaxy.eti.pg.gda.pl/katedry/kiw/pracownicy/Jan.Daciuk/personal/).
263///
264/// * [Incremental construction of minimal acyclic finite-state automata](https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601)
265/// (Section 3 provides a decent overview of the algorithm used to construct
266/// transducers in this crate, assuming all outputs are `0`.)
267/// * [Direct Construction of Minimal Acyclic Subsequential Transducers](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.24.3698&rep=rep1&type=pdf)
268/// (The whole thing. The proof is dense but illuminating. The algorithm at
269/// the end is the money shot, namely, it incorporates output values.)
270/// * [Experiments with Automata Compression](https://www.researchgate.net/profile/Jiri-Dvorsky/publication/221568039_Word_Random_Access_Compression/links/0c96052c095630d5b3000000/Word-Random-Access-Compression.pdf#page=116), [Smaller Representation of Finite State Automata](https://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf)
271/// (various compression techniques for representing states/transitions)
272/// * [Jan Daciuk's dissertation](http://www.pg.gda.pl/~jandac/thesis.ps.gz)
273/// (excellent for in depth overview)
274/// * [Comparison of Construction Algorithms for Minimal, Acyclic, Deterministic, Finite-State Automata from Sets of Strings](https://www.cs.mun.ca/~harold/Courses/Old/CS4750/Diary/q3p2qx4lv71m5vew.pdf)
275/// (excellent for surface level overview)
276#[derive(Clone)]
277pub struct Fst<D> {
278 meta: Meta,
279 data: D,
280}
281
282#[derive(Debug, Clone)]
283struct Meta {
284 version: u64,
285 root_addr: CompiledAddr,
286 ty: FstType,
287 len: usize,
288 /// A checksum is missing when the FST version is <= 2. (Checksums were
289 /// added in version 3.)
290 checksum: Option<u32>,
291}
292
293#[cfg(feature = "alloc")]
294impl Fst<Vec<u8>> {
295 /// Create a new FST from an iterator of lexicographically ordered byte
296 /// strings. Every key's value is set to `0`.
297 ///
298 /// If the iterator does not yield values in lexicographic order, then an
299 /// error is returned.
300 ///
301 /// Note that this is a convenience function to build an FST in memory.
302 /// To build an FST that streams to an arbitrary `io::Write`, use
303 /// `raw::Builder`.
304 #[cfg(feature = "std")]
305 pub fn from_iter_set<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
306 where
307 K: AsRef<[u8]>,
308 I: IntoIterator<Item = K>,
309 {
310 let mut builder = Builder::memory();
311 for k in iter {
312 builder.add(k)?;
313 }
314 Ok(builder.into_fst())
315 }
316
317 /// Create a new FST from an iterator of lexicographically ordered byte
318 /// strings. The iterator should consist of tuples, where the first element
319 /// is the byte string and the second element is its corresponding value.
320 ///
321 /// If the iterator does not yield unique keys in lexicographic order, then
322 /// an error is returned.
323 ///
324 /// Note that this is a convenience function to build an FST in memory.
325 /// To build an FST that streams to an arbitrary `io::Write`, use
326 /// `raw::Builder`.
327 #[cfg(feature = "std")]
328 pub fn from_iter_map<K, I>(iter: I) -> Result<Fst<Vec<u8>>>
329 where
330 K: AsRef<[u8]>,
331 I: IntoIterator<Item = (K, u64)>,
332 {
333 let mut builder = Builder::memory();
334 for (k, v) in iter {
335 builder.insert(k, v)?;
336 }
337 Ok(builder.into_fst())
338 }
339}
340
341impl<D: AsRef<[u8]>> Fst<D> {
342 /// Creates a transducer from its representation as a raw byte sequence.
343 ///
344 /// This operation is intentionally very cheap (no allocations and no
345 /// copies). In particular, no verification on the integrity of the
346 /// FST is performed. Callers may opt into integrity checks via the
347 /// [`Fst::verify`](struct.Fst.html#method.verify) method.
348 ///
349 /// The fst must have been written with a compatible finite state
350 /// transducer builder (`Builder` qualifies). If the format is invalid or
351 /// if there is a mismatch between the API version of this library and the
352 /// fst, then an error is returned.
353 #[inline]
354 pub fn new(data: D) -> Result<Fst<D>> {
355 let bytes = data.as_ref();
356 if bytes.len() < 36 {
357 return Err(Error::Format { size: bytes.len() }.into());
358 }
359 // The read_u64 unwraps below are OK because they can never fail.
360 // They can only fail when there is an IO error or if there is an
361 // unexpected EOF. However, we are reading from a byte slice (no
362 // IO errors possible) and we've confirmed the byte slice is at least
363 // N bytes (no unexpected EOF).
364 let version = bytes::read_u64_le(bytes);
365 if version == 0 || version > VERSION {
366 return Err(
367 Error::Version { expected: VERSION, got: version }.into()
368 );
369 }
370 let ty = bytes::read_u64_le(&bytes[8..]);
371
372 let (end, checksum) = if version <= 2 {
373 (bytes.len(), None)
374 } else {
375 let checksum = bytes::read_u32_le(&bytes[bytes.len() - 4..]);
376 (bytes.len() - 4, Some(checksum))
377 };
378 let root_addr = {
379 let last = &bytes[end - 8..];
380 u64_to_usize(bytes::read_u64_le(last))
381 };
382 let len = {
383 let last2 = &bytes[end - 16..];
384 u64_to_usize(bytes::read_u64_le(last2))
385 };
386 // The root node is always the last node written, so its address should
387 // be near the end. After the root node is written, we still have to
388 // write the root *address* and the number of keys in the FST, along
389 // with the checksum. That's 20 bytes. The extra byte used below (21
390 // and not 20) comes from the fact that the root address points to
391 // the last byte in the root node, rather than the byte immediately
392 // following the root node.
393 //
394 // If this check passes, it is still possible that the FST is invalid
395 // but probably unlikely. If this check reports a false positive, then
396 // the program will probably panic. In the worst case, the FST will
397 // operate but be subtly wrong. (This would require the bytes to be in
398 // a format expected by an FST, which is incredibly unlikely.)
399 //
400 // The special check for EMPTY_ADDRESS is needed since an empty FST
401 // has a root node that is empty and final, which means it has the
402 // special address `0`. In that case, the FST is the smallest it can
403 // be: the version, type, root address and number of nodes. That's
404 // 36 bytes (8 byte u64 each).
405 //
406 // And finally, our calculation changes somewhat based on version.
407 // If the FST version is less than 3, then it does not have a checksum.
408 let (empty_total, addr_offset) =
409 if version <= 2 { (32, 17) } else { (36, 21) };
410 if (root_addr == EMPTY_ADDRESS && bytes.len() != empty_total)
411 && root_addr + addr_offset != bytes.len()
412 {
413 return Err(Error::Format { size: bytes.len() }.into());
414 }
415 let meta = Meta { version, root_addr, ty, len, checksum };
416 Ok(Fst { meta, data })
417 }
418
419 /// Retrieves the value associated with a key.
420 ///
421 /// If the key does not exist, then `None` is returned.
422 #[inline]
423 pub fn get<B: AsRef<[u8]>>(&self, key: B) -> Option<Output> {
424 self.as_ref().get(key.as_ref())
425 }
426
427 /// Returns true if and only if the given key is in this FST.
428 #[inline]
429 pub fn contains_key<B: AsRef<[u8]>>(&self, key: B) -> bool {
430 self.as_ref().contains_key(key.as_ref())
431 }
432
433 /// Retrieves the key associated with the given value.
434 ///
435 /// This is like `get_key_into`, but will return the key itself without
436 /// allowing the caller to reuse an allocation.
437 ///
438 /// If the given value does not exist, then `None` is returned.
439 ///
440 /// The values in this FST are not monotonically increasing when sorted
441 /// lexicographically by key, then this routine has unspecified behavior.
442 #[inline]
443 #[cfg(feature = "alloc")]
444 pub fn get_key(&self, value: u64) -> Option<Vec<u8>> {
445 let mut key = vec![];
446 if self.get_key_into(value, &mut key) {
447 Some(key)
448 } else {
449 None
450 }
451 }
452
453 /// Retrieves the key associated with the given value.
454 ///
455 /// If the given value does not exist, then `false` is returned. In this
456 /// case, the contents of `key` are unspecified.
457 ///
458 /// The given buffer is not clearer before the key is written to it.
459 ///
460 /// The values in this FST are not monotonically increasing when sorted
461 /// lexicographically by key, then this routine has unspecified behavior.
462 #[inline]
463 #[cfg(feature = "alloc")]
464 pub fn get_key_into(&self, value: u64, key: &mut Vec<u8>) -> bool {
465 self.as_ref().get_key_into(value, key)
466 }
467
468 /// Return a lexicographically ordered stream of all key-value pairs in
469 /// this fst.
470 #[inline]
471 #[cfg(feature = "alloc")]
472 pub fn stream(&self) -> Stream<'_> {
473 StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
474 }
475
476 /// Return a builder for range queries.
477 ///
478 /// A range query returns a subset of key-value pairs in this fst in a
479 /// range given in lexicographic order.
480 #[inline]
481 #[cfg(feature = "alloc")]
482 pub fn range(&self) -> StreamBuilder<'_> {
483 StreamBuilder::new(self.as_ref(), AlwaysMatch)
484 }
485
486 /// Executes an automaton on the keys of this FST.
487 #[inline]
488 #[cfg(feature = "alloc")]
489 pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<'_, A> {
490 StreamBuilder::new(self.as_ref(), aut)
491 }
492
493 /// Executes an automaton on the keys of this FST and yields matching
494 /// keys along with the corresponding matching states in the given
495 /// automaton.
496 #[inline]
497 #[cfg(feature = "alloc")]
498 pub fn search_with_state<A: Automaton>(
499 &self,
500 aut: A,
501 ) -> StreamWithStateBuilder<'_, A> {
502 StreamWithStateBuilder::new(self.as_ref(), aut)
503 }
504
505 /// Returns the number of keys in this fst.
506 #[inline]
507 pub fn len(&self) -> usize {
508 self.as_ref().len()
509 }
510
511 /// Returns true if and only if this fst has no keys.
512 #[inline]
513 pub fn is_empty(&self) -> bool {
514 self.as_ref().is_empty()
515 }
516
517 /// Returns the number of bytes used by this fst.
518 #[inline]
519 pub fn size(&self) -> usize {
520 self.as_ref().size()
521 }
522
523 /// Attempts to verify this FST by computing its checksum.
524 ///
525 /// This will scan over all of the bytes in the underlying FST, so this
526 /// may be an expensive operation depending on the size of the FST.
527 ///
528 /// This returns an error in two cases:
529 ///
530 /// 1. When a checksum does not exist, which is the case for FSTs that were
531 /// produced by the `fst` crate before version `0.4`.
532 /// 2. When the checksum in the FST does not match the computed checksum
533 /// performed by this procedure.
534 #[inline]
535 pub fn verify(&self) -> Result<()> {
536 use crate::raw::crc32::CheckSummer;
537
538 let expected = match self.as_ref().meta.checksum {
539 None => return Err(Error::ChecksumMissing.into()),
540 Some(expected) => expected,
541 };
542 let mut summer = CheckSummer::new();
543 summer.update(&self.as_bytes()[..self.as_bytes().len() - 4]);
544 let got = summer.masked();
545 if expected == got {
546 return Ok(());
547 }
548 Err(Error::ChecksumMismatch { expected, got }.into())
549 }
550
551 /// Creates a new fst operation with this fst added to it.
552 ///
553 /// The `OpBuilder` type can be used to add additional fst streams
554 /// and perform set operations like union, intersection, difference and
555 /// symmetric difference on the keys of the fst. These set operations also
556 /// allow one to specify how conflicting values are merged in the stream.
557 #[inline]
558 #[cfg(feature = "alloc")]
559 pub fn op(&self) -> OpBuilder<'_> {
560 OpBuilder::new().add(self)
561 }
562
563 /// Returns true if and only if the `self` fst is disjoint with the fst
564 /// `stream`.
565 ///
566 /// `stream` must be a lexicographically ordered sequence of byte strings
567 /// with associated values.
568 #[inline]
569 #[cfg(feature = "alloc")]
570 pub fn is_disjoint<'f, I, S>(&self, stream: I) -> bool
571 where
572 I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
573 S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
574 {
575 self.op().add(stream).intersection().next().is_none()
576 }
577
578 /// Returns true if and only if the `self` fst is a subset of the fst
579 /// `stream`.
580 ///
581 /// `stream` must be a lexicographically ordered sequence of byte strings
582 /// with associated values.
583 #[inline]
584 #[cfg(feature = "alloc")]
585 pub fn is_subset<'f, I, S>(&self, stream: I) -> bool
586 where
587 I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
588 S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
589 {
590 let mut op = self.op().add(stream).intersection();
591 let mut count = 0;
592 while op.next().is_some() {
593 count += 1;
594 }
595 count == self.len()
596 }
597
598 /// Returns true if and only if the `self` fst is a superset of the fst
599 /// `stream`.
600 ///
601 /// `stream` must be a lexicographically ordered sequence of byte strings
602 /// with associated values.
603 #[inline]
604 #[cfg(feature = "alloc")]
605 pub fn is_superset<'f, I, S>(&self, stream: I) -> bool
606 where
607 I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
608 S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
609 {
610 let mut op = self.op().add(stream).union();
611 let mut count = 0;
612 while op.next().is_some() {
613 count += 1;
614 }
615 count == self.len()
616 }
617
618 /// Returns the underlying type of this fst.
619 ///
620 /// `FstType` is a convention used to indicate the type of the underlying
621 /// transducer.
622 ///
623 /// This crate reserves the range 0-255 (inclusive) but currently leaves
624 /// the meaning of 0-255 unspecified.
625 #[inline]
626 pub fn fst_type(&self) -> FstType {
627 self.as_ref().fst_type()
628 }
629
630 /// Returns the root node of this fst.
631 #[inline]
632 pub fn root(&self) -> Node<'_> {
633 self.as_ref().root()
634 }
635
636 /// Returns the node at the given address.
637 ///
638 /// Node addresses can be obtained by reading transitions on `Node` values.
639 #[inline]
640 pub fn node(&self, addr: CompiledAddr) -> Node<'_> {
641 self.as_ref().node(addr)
642 }
643
644 /// Returns a copy of the binary contents of this FST.
645 #[inline]
646 #[cfg(feature = "alloc")]
647 pub fn to_vec(&self) -> Vec<u8> {
648 self.as_ref().to_vec()
649 }
650
651 /// Returns the binary contents of this FST.
652 #[inline]
653 pub fn as_bytes(&self) -> &[u8] {
654 self.as_ref().as_bytes()
655 }
656
657 #[inline]
658 fn as_ref(&self) -> FstRef {
659 FstRef { meta: &self.meta, data: self.data.as_ref() }
660 }
661}
662
663impl<D> Fst<D> {
664 /// Returns the underlying data which constitutes the FST itself.
665 #[inline]
666 pub fn into_inner(self) -> D {
667 self.data
668 }
669
670 /// Returns a borrow to the underlying data which constitutes the FST itself.
671 #[inline]
672 pub fn as_inner(&self) -> &D {
673 &self.data
674 }
675
676 /// Maps the underlying data of the fst to another data type.
677 #[inline]
678 pub fn map_data<F, T>(self, mut f: F) -> Result<Fst<T>>
679 where
680 F: FnMut(D) -> T,
681 T: AsRef<[u8]>,
682 {
683 Fst::new(f(self.into_inner()))
684 }
685}
686
687#[cfg(feature = "alloc")]
688impl<'a, 'f, D: AsRef<[u8]>> IntoStreamer<'a> for &'f Fst<D> {
689 type Item = (&'a [u8], Output);
690 type Into = Stream<'f>;
691
692 #[inline]
693 #[cfg(feature = "alloc")]
694 fn into_stream(self) -> Stream<'f> {
695 StreamBuilder::new(self.as_ref(), AlwaysMatch).into_stream()
696 }
697}
698
699struct FstRef<'f> {
700 meta: &'f Meta,
701 data: &'f [u8],
702}
703
704impl<'f> FstRef<'f> {
705 #[inline]
706 fn get(&self, key: &[u8]) -> Option<Output> {
707 let mut node = self.root();
708 let mut out = Output::zero();
709 for &b in key {
710 node = match node.find_input(b) {
711 None => return None,
712 Some(i) => {
713 let t = node.transition(i);
714 out = out.cat(t.out);
715 self.node(t.addr)
716 }
717 }
718 }
719 if !node.is_final() {
720 None
721 } else {
722 Some(out.cat(node.final_output()))
723 }
724 }
725
726 #[inline]
727 fn contains_key(&self, key: &[u8]) -> bool {
728 let mut node = self.root();
729 for &b in key {
730 node = match node.find_input(b) {
731 None => return false,
732 Some(i) => self.node(node.transition_addr(i)),
733 }
734 }
735 node.is_final()
736 }
737
738 #[inline]
739 #[cfg(feature = "alloc")]
740 fn get_key_into(&self, mut value: u64, key: &mut Vec<u8>) -> bool {
741 let mut node = self.root();
742 while value != 0 || !node.is_final() {
743 let trans = node
744 .transitions()
745 .take_while(|t| t.out.value() <= value)
746 .last();
747 node = match trans {
748 None => return false,
749 Some(t) => {
750 value -= t.out.value();
751 key.push(t.inp);
752 self.node(t.addr)
753 }
754 };
755 }
756 true
757 }
758
759 #[inline]
760 fn len(&self) -> usize {
761 self.meta.len
762 }
763
764 #[inline]
765 fn is_empty(&self) -> bool {
766 self.meta.len == 0
767 }
768
769 #[inline]
770 fn size(&self) -> usize {
771 self.as_bytes().len()
772 }
773
774 #[inline]
775 fn fst_type(&self) -> FstType {
776 self.meta.ty
777 }
778
779 #[inline]
780 fn root_addr(&self) -> CompiledAddr {
781 self.meta.root_addr
782 }
783
784 #[inline]
785 fn root(&self) -> Node<'f> {
786 self.node(self.root_addr())
787 }
788
789 #[inline]
790 fn node(&self, addr: CompiledAddr) -> Node<'f> {
791 Node::new(self.meta.version, addr, self.as_bytes())
792 }
793
794 #[inline]
795 #[cfg(feature = "alloc")]
796 fn to_vec(&self) -> Vec<u8> {
797 self.as_bytes().to_vec()
798 }
799
800 #[inline]
801 fn as_bytes(&self) -> &'f [u8] {
802 self.data
803 }
804
805 #[inline]
806 #[cfg(feature = "alloc")]
807 fn empty_final_output(&self) -> Option<Output> {
808 let root = self.root();
809 if root.is_final() {
810 Some(root.final_output())
811 } else {
812 None
813 }
814 }
815}
816
817/// A builder for constructing range queries on streams.
818///
819/// Once all bounds are set, one should call `into_stream` to get a
820/// `Stream`.
821///
822/// Bounds are not additive. That is, if `ge` is called twice on the same
823/// builder, then the second setting wins.
824///
825/// The `A` type parameter corresponds to an optional automaton to filter
826/// the stream. By default, no filtering is done.
827///
828/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
829#[cfg(feature = "alloc")]
830pub struct StreamBuilder<'f, A = AlwaysMatch> {
831 fst: FstRef<'f>,
832 aut: A,
833 min: Bound,
834 max: Bound,
835}
836
837#[cfg(feature = "alloc")]
838impl<'f, A: Automaton> StreamBuilder<'f, A> {
839 fn new(fst: FstRef<'f>, aut: A) -> StreamBuilder<'f, A> {
840 StreamBuilder {
841 fst,
842 aut,
843 min: Bound::Unbounded,
844 max: Bound::Unbounded,
845 }
846 }
847
848 /// Specify a greater-than-or-equal-to bound.
849 #[cfg(feature = "alloc")]
850 pub fn ge<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
851 self.min = Bound::Included(bound.as_ref().to_owned());
852 self
853 }
854
855 /// Specify a greater-than bound.
856 #[cfg(feature = "alloc")]
857 pub fn gt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
858 self.min = Bound::Excluded(bound.as_ref().to_owned());
859 self
860 }
861
862 /// Specify a less-than-or-equal-to bound.
863 #[cfg(feature = "alloc")]
864 pub fn le<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
865 self.max = Bound::Included(bound.as_ref().to_owned());
866 self
867 }
868
869 /// Specify a less-than bound.
870 #[cfg(feature = "alloc")]
871 pub fn lt<T: AsRef<[u8]>>(mut self, bound: T) -> StreamBuilder<'f, A> {
872 self.max = Bound::Excluded(bound.as_ref().to_owned());
873 self
874 }
875}
876
877#[cfg(feature = "alloc")]
878impl<'a, 'f, A: Automaton> IntoStreamer<'a> for StreamBuilder<'f, A> {
879 type Item = (&'a [u8], Output);
880 type Into = Stream<'f, A>;
881
882 fn into_stream(self) -> Stream<'f, A> {
883 Stream::new(self.fst, self.aut, self.min, self.max)
884 }
885}
886
887/// A builder for constructing range queries on streams that include automaton
888/// states.
889///
890/// In general, one should use `StreamBuilder` unless you have a specific need
891/// for accessing the states of the underlying automaton that is being used to
892/// filter this stream.
893///
894/// Once all bounds are set, one should call `into_stream` to get a
895/// `Stream`.
896///
897/// Bounds are not additive. That is, if `ge` is called twice on the same
898/// builder, then the second setting wins.
899///
900/// The `A` type parameter corresponds to an optional automaton to filter
901/// the stream. By default, no filtering is done.
902///
903/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
904#[cfg(feature = "alloc")]
905pub struct StreamWithStateBuilder<'f, A = AlwaysMatch> {
906 fst: FstRef<'f>,
907 aut: A,
908 min: Bound,
909 max: Bound,
910}
911
912#[cfg(feature = "alloc")]
913impl<'f, A: Automaton> StreamWithStateBuilder<'f, A> {
914 fn new(fst: FstRef<'f>, aut: A) -> StreamWithStateBuilder<'f, A> {
915 StreamWithStateBuilder {
916 fst,
917 aut,
918 min: Bound::Unbounded,
919 max: Bound::Unbounded,
920 }
921 }
922
923 /// Specify a greater-than-or-equal-to bound.
924 pub fn ge<T: AsRef<[u8]>>(
925 mut self,
926 bound: T,
927 ) -> StreamWithStateBuilder<'f, A> {
928 self.min = Bound::Included(bound.as_ref().to_owned());
929 self
930 }
931
932 /// Specify a greater-than bound.
933 pub fn gt<T: AsRef<[u8]>>(
934 mut self,
935 bound: T,
936 ) -> StreamWithStateBuilder<'f, A> {
937 self.min = Bound::Excluded(bound.as_ref().to_owned());
938 self
939 }
940
941 /// Specify a less-than-or-equal-to bound.
942 pub fn le<T: AsRef<[u8]>>(
943 mut self,
944 bound: T,
945 ) -> StreamWithStateBuilder<'f, A> {
946 self.max = Bound::Included(bound.as_ref().to_owned());
947 self
948 }
949
950 /// Specify a less-than bound.
951 pub fn lt<T: AsRef<[u8]>>(
952 mut self,
953 bound: T,
954 ) -> StreamWithStateBuilder<'f, A> {
955 self.max = Bound::Excluded(bound.as_ref().to_owned());
956 self
957 }
958}
959
960#[cfg(feature = "alloc")]
961impl<'a, 'f, A: 'a + Automaton> IntoStreamer<'a>
962 for StreamWithStateBuilder<'f, A>
963where
964 A::State: Clone,
965{
966 type Item = (&'a [u8], Output, A::State);
967 type Into = StreamWithState<'f, A>;
968
969 fn into_stream(self) -> StreamWithState<'f, A> {
970 StreamWithState::new(self.fst, self.aut, self.min, self.max)
971 }
972}
973
974#[derive(Debug)]
975#[cfg(feature = "alloc")]
976enum Bound {
977 Included(Vec<u8>),
978 Excluded(Vec<u8>),
979 Unbounded,
980}
981
982#[cfg(feature = "alloc")]
983impl Bound {
984 #[inline]
985 fn exceeded_by(&self, inp: &[u8]) -> bool {
986 match *self {
987 Bound::Included(ref v) => inp > v,
988 Bound::Excluded(ref v) => inp >= v,
989 Bound::Unbounded => false,
990 }
991 }
992
993 #[inline]
994 fn is_empty(&self) -> bool {
995 match *self {
996 Bound::Included(ref v) => v.is_empty(),
997 Bound::Excluded(ref v) => v.is_empty(),
998 Bound::Unbounded => true,
999 }
1000 }
1001
1002 #[inline]
1003 fn is_inclusive(&self) -> bool {
1004 matches!(*self, Bound::Included(_) | Bound::Unbounded)
1005 }
1006}
1007
1008/// A lexicographically ordered stream of key-value pairs from an fst.
1009///
1010/// The `A` type parameter corresponds to an optional automaton to filter
1011/// the stream. By default, no filtering is done.
1012///
1013/// The `'f` lifetime parameter refers to the lifetime of the underlying fst.
1014#[cfg(feature = "alloc")]
1015pub struct Stream<'f, A: Automaton = AlwaysMatch>(StreamWithState<'f, A>);
1016
1017#[cfg(feature = "alloc")]
1018impl<'f, A: Automaton> Stream<'f, A> {
1019 fn new(fst: FstRef<'f>, aut: A, min: Bound, max: Bound) -> Stream<'f, A> {
1020 Stream(StreamWithState::new(fst, aut, min, max))
1021 }
1022
1023 /// Convert this stream into a vector of byte strings and outputs.
1024 ///
1025 /// Note that this creates a new allocation for every key in the stream.
1026 pub fn into_byte_vec(mut self) -> Vec<(Vec<u8>, u64)> {
1027 let mut vs = vec![];
1028 while let Some((k, v)) = self.next() {
1029 vs.push((k.to_vec(), v.value()));
1030 }
1031 vs
1032 }
1033
1034 /// Convert this stream into a vector of Unicode strings and outputs.
1035 ///
1036 /// If any key is not valid UTF-8, then iteration on the stream is stopped
1037 /// and a UTF-8 decoding error is returned.
1038 ///
1039 /// Note that this creates a new allocation for every key in the stream.
1040 pub fn into_str_vec(mut self) -> Result<Vec<(String, u64)>> {
1041 let mut vs = vec![];
1042 while let Some((k, v)) = self.next() {
1043 let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
1044 vs.push((k, v.value()));
1045 }
1046 Ok(vs)
1047 }
1048
1049 /// Convert this stream into a vector of byte strings.
1050 ///
1051 /// Note that this creates a new allocation for every key in the stream.
1052 pub fn into_byte_keys(mut self) -> Vec<Vec<u8>> {
1053 let mut vs = vec![];
1054 while let Some((k, _)) = self.next() {
1055 vs.push(k.to_vec());
1056 }
1057 vs
1058 }
1059
1060 /// Convert this stream into a vector of Unicode strings.
1061 ///
1062 /// If any key is not valid UTF-8, then iteration on the stream is stopped
1063 /// and a UTF-8 decoding error is returned.
1064 ///
1065 /// Note that this creates a new allocation for every key in the stream.
1066 pub fn into_str_keys(mut self) -> Result<Vec<String>> {
1067 let mut vs = vec![];
1068 while let Some((k, _)) = self.next() {
1069 let k = String::from_utf8(k.to_vec()).map_err(Error::from)?;
1070 vs.push(k);
1071 }
1072 Ok(vs)
1073 }
1074
1075 /// Convert this stream into a vector of outputs.
1076 pub fn into_values(mut self) -> Vec<u64> {
1077 let mut vs = vec![];
1078 while let Some((_, v)) = self.next() {
1079 vs.push(v.value());
1080 }
1081 vs
1082 }
1083}
1084
1085#[cfg(feature = "alloc")]
1086impl<'f, 'a, A: Automaton> Streamer<'a> for Stream<'f, A> {
1087 type Item = (&'a [u8], Output);
1088
1089 fn next(&'a mut self) -> Option<(&'a [u8], Output)> {
1090 self.0.next_with(|_| ()).map(|(key, out, ())| (key, out))
1091 }
1092}
1093
1094/// A lexicographically ordered stream of key-value-state triples from an fst
1095/// and an automaton.
1096///
1097/// The key-values are from the underyling FSTP while the states are from the
1098/// automaton.
1099///
1100/// The `A` type parameter corresponds to an optional automaton to filter
1101/// the stream. By default, no filtering is done.
1102///
1103/// The `'m` lifetime parameter refers to the lifetime of the underlying map.
1104#[cfg(feature = "alloc")]
1105pub struct StreamWithState<'f, A = AlwaysMatch>
1106where
1107 A: Automaton,
1108{
1109 fst: FstRef<'f>,
1110 aut: A,
1111 inp: Vec<u8>,
1112 empty_output: Option<Output>,
1113 stack: Vec<StreamState<'f, A::State>>,
1114 end_at: Bound,
1115}
1116
1117#[derive(Clone, Debug)]
1118#[cfg(feature = "alloc")]
1119struct StreamState<'f, S> {
1120 node: Node<'f>,
1121 trans: usize,
1122 out: Output,
1123 aut_state: S,
1124}
1125
1126#[cfg(feature = "alloc")]
1127impl<'f, A: Automaton> StreamWithState<'f, A> {
1128 fn new(
1129 fst: FstRef<'f>,
1130 aut: A,
1131 min: Bound,
1132 max: Bound,
1133 ) -> StreamWithState<'f, A> {
1134 let mut rdr = StreamWithState {
1135 fst,
1136 aut,
1137 inp: Vec::with_capacity(16),
1138 empty_output: None,
1139 stack: vec![],
1140 end_at: max,
1141 };
1142 rdr.seek_min(min);
1143 rdr
1144 }
1145
1146 /// Seeks the underlying stream such that the next key to be read is the
1147 /// smallest key in the underlying fst that satisfies the given minimum
1148 /// bound.
1149 ///
1150 /// This theoretically should be straight-forward, but we need to make
1151 /// sure our stack is correct, which includes accounting for automaton
1152 /// states.
1153 fn seek_min(&mut self, min: Bound) {
1154 if min.is_empty() {
1155 if min.is_inclusive() {
1156 self.empty_output = self.fst.empty_final_output();
1157 }
1158 self.stack = vec![StreamState {
1159 node: self.fst.root(),
1160 trans: 0,
1161 out: Output::zero(),
1162 aut_state: self.aut.start(),
1163 }];
1164 return;
1165 }
1166 let (key, inclusive) = match min {
1167 Bound::Excluded(ref min) => (min, false),
1168 Bound::Included(ref min) => (min, true),
1169 Bound::Unbounded => unreachable!(),
1170 };
1171 // At this point, we need to find the starting location of `min` in
1172 // the FST. However, as we search, we need to maintain a stack of
1173 // reader states so that the reader can pick up where we left off.
1174 // N.B. We do not necessarily need to stop in a final state, unlike
1175 // the one-off `find` method. For the example, the given bound might
1176 // not actually exist in the FST.
1177 let mut node = self.fst.root();
1178 let mut out = Output::zero();
1179 let mut aut_state = self.aut.start();
1180 for &b in key {
1181 match node.find_input(b) {
1182 Some(i) => {
1183 let t = node.transition(i);
1184 let prev_state = aut_state;
1185 aut_state = self.aut.accept(&prev_state, b);
1186 self.inp.push(b);
1187 self.stack.push(StreamState {
1188 node,
1189 trans: i + 1,
1190 out,
1191 aut_state: prev_state,
1192 });
1193 out = out.cat(t.out);
1194 node = self.fst.node(t.addr);
1195 }
1196 None => {
1197 // This is a little tricky. We're in this case if the
1198 // given bound is not a prefix of any key in the FST.
1199 // Since this is a minimum bound, we need to find the
1200 // first transition in this node that proceeds the current
1201 // input byte.
1202 self.stack.push(StreamState {
1203 node,
1204 trans: node
1205 .transitions()
1206 .position(|t| t.inp > b)
1207 .unwrap_or(node.len()),
1208 out,
1209 aut_state,
1210 });
1211 return;
1212 }
1213 }
1214 }
1215 if !self.stack.is_empty() {
1216 let last = self.stack.len() - 1;
1217 if inclusive {
1218 self.stack[last].trans -= 1;
1219 self.inp.pop();
1220 } else {
1221 let node = self.stack[last].node;
1222 let trans = self.stack[last].trans;
1223 self.stack.push(StreamState {
1224 node: self.fst.node(node.transition(trans - 1).addr),
1225 trans: 0,
1226 out,
1227 aut_state,
1228 });
1229 }
1230 }
1231 }
1232
1233 fn next_with<T>(
1234 &mut self,
1235 mut map: impl FnMut(&A::State) -> T,
1236 ) -> Option<(&[u8], Output, T)> {
1237 if let Some(out) = self.empty_output.take() {
1238 if self.end_at.exceeded_by(&[]) {
1239 self.stack.clear();
1240 return None;
1241 }
1242
1243 let start = self.aut.start();
1244 if self.aut.is_match(&start) {
1245 return Some((&[], out, map(&start)));
1246 }
1247 }
1248 while let Some(state) = self.stack.pop() {
1249 if state.trans >= state.node.len()
1250 || !self.aut.can_match(&state.aut_state)
1251 {
1252 if state.node.addr() != self.fst.root_addr() {
1253 self.inp.pop().unwrap();
1254 }
1255 continue;
1256 }
1257 let trans = state.node.transition(state.trans);
1258 let out = state.out.cat(trans.out);
1259 let next_state = self.aut.accept(&state.aut_state, trans.inp);
1260 let t = map(&next_state);
1261 let mut is_match = self.aut.is_match(&next_state);
1262 let next_node = self.fst.node(trans.addr);
1263 self.inp.push(trans.inp);
1264 if next_node.is_final() {
1265 if let Some(eof_state) = self.aut.accept_eof(&next_state) {
1266 is_match = self.aut.is_match(&eof_state);
1267 }
1268 }
1269 self.stack.push(StreamState { trans: state.trans + 1, ..state });
1270 self.stack.push(StreamState {
1271 node: next_node,
1272 trans: 0,
1273 out,
1274 aut_state: next_state,
1275 });
1276 if self.end_at.exceeded_by(&self.inp) {
1277 // We are done, forever.
1278 self.stack.clear();
1279 return None;
1280 }
1281 if next_node.is_final() && is_match {
1282 return Some((
1283 &self.inp,
1284 out.cat(next_node.final_output()),
1285 t,
1286 ));
1287 }
1288 }
1289 None
1290 }
1291}
1292
1293#[cfg(feature = "alloc")]
1294impl<'a, 'f, A: 'a + Automaton> Streamer<'a> for StreamWithState<'f, A>
1295where
1296 A::State: Clone,
1297{
1298 type Item = (&'a [u8], Output, A::State);
1299
1300 fn next(&'a mut self) -> Option<(&'a [u8], Output, A::State)> {
1301 self.next_with(Clone::clone)
1302 }
1303}
1304
1305/// An output is a value that is associated with a key in a finite state
1306/// transducer.
1307///
1308/// Note that outputs must satisfy an algebra. Namely, it must have an additive
1309/// identity and the following binary operations defined: `prefix`,
1310/// `concatenation` and `subtraction`. `prefix` and `concatenation` are
1311/// commutative while `subtraction` is not. `subtraction` is only defined on
1312/// pairs of operands where the first operand is greater than or equal to the
1313/// second operand.
1314///
1315/// Currently, output values must be `u64`. However, in theory, an output value
1316/// can be anything that satisfies the above algebra. Future versions of this
1317/// crate may make outputs generic on this algebra.
1318#[derive(Copy, Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
1319pub struct Output(u64);
1320
1321impl Output {
1322 /// Create a new output from a `u64`.
1323 #[inline]
1324 #[must_use]
1325 pub fn new(v: u64) -> Output {
1326 Output(v)
1327 }
1328
1329 /// Create a zero output.
1330 #[inline]
1331 #[must_use]
1332 pub fn zero() -> Output {
1333 Output(0)
1334 }
1335
1336 /// Retrieve the value inside this output.
1337 #[inline]
1338 #[must_use]
1339 pub fn value(self) -> u64 {
1340 self.0
1341 }
1342
1343 /// Returns true if this is a zero output.
1344 #[inline]
1345 #[must_use]
1346 pub fn is_zero(self) -> bool {
1347 self.0 == 0
1348 }
1349
1350 /// Returns the prefix of this output and `o`.
1351 #[inline]
1352 #[must_use]
1353 pub fn prefix(self, o: Output) -> Output {
1354 Output(cmp::min(self.0, o.0))
1355 }
1356
1357 /// Returns the concatenation of this output and `o`.
1358 #[inline]
1359 #[must_use]
1360 pub fn cat(self, o: Output) -> Output {
1361 Output(self.0 + o.0)
1362 }
1363}
1364
1365impl core::ops::Sub for Output {
1366 type Output = Output;
1367
1368 #[inline]
1369 fn sub(self, rhs: Self) -> Self::Output {
1370 Self(
1371 self.0
1372 .checked_sub(rhs.0)
1373 .expect("BUG: underflow subtraction not allowed"),
1374 )
1375 }
1376}
1377
1378/// A transition from one note to another.
1379#[derive(Copy, Clone, Hash, Eq, PartialEq)]
1380pub struct Transition {
1381 /// The byte input associated with this transition.
1382 pub inp: u8,
1383 /// The output associated with this transition.
1384 pub out: Output,
1385 /// The address of the node that this transition points to.
1386 pub addr: CompiledAddr,
1387}
1388
1389impl Default for Transition {
1390 #[inline]
1391 fn default() -> Transition {
1392 Transition { inp: 0, out: Output::zero(), addr: NONE_ADDRESS }
1393 }
1394}
1395
1396impl fmt::Debug for Transition {
1397 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1398 if self.out.is_zero() {
1399 write!(f, "{} -> {}", self.inp as char, self.addr)
1400 } else {
1401 write!(
1402 f,
1403 "({}, {}) -> {}",
1404 self.inp as char,
1405 self.out.value(),
1406 self.addr
1407 )
1408 }
1409 }
1410}
1411
1412#[inline]
1413#[cfg(target_pointer_width = "64")]
1414#[allow(clippy::cast_possible_truncation)]
1415fn u64_to_usize(n: u64) -> usize {
1416 n as usize
1417}
1418
1419#[inline]
1420#[cfg(not(target_pointer_width = "64"))]
1421fn u64_to_usize(n: u64) -> usize {
1422 if n > core::usize::MAX as u64 {
1423 panic!(
1424 "\
1425Cannot convert node address {} to a pointer sized variable. If this FST
1426is very large and was generated on a system with a larger pointer size
1427than this system, then it is not possible to read this FST on this
1428system.",
1429 n
1430 );
1431 }
1432 n as usize
1433}