general_sam/
lib.rs

1//! This crate provides an implementation of a general suffix automaton.
2//!
3//! ```mermaid
4//! flowchart LR
5//!   init((ε))
6//!   a((a))
7//!   b((b))
8//!   ab((ab))
9//!   bc(((bc)))
10//!   abc((abc))
11//!   abcb((abcb))
12//!   abcbc(((abcbc)))
13//!
14//!   init -- a --> a
15//!   init -- b --> b
16//!   a -- b --> ab
17//!   b -- c --> bc
18//!   init -- c --> bc
19//!   ab -- c --> abc
20//!   bc -- b --> abcb
21//!   abc -- b --> abcb
22//!   abcb -- c --> abcbc
23//! ```
24//!
25//! > The suffix automaton of abcbc.
26//!
27//! # Examples
28//!
29//! ```rust
30//! use general_sam::{GeneralSam, BTreeTransTable};
31//!
32//! let sam = GeneralSam::<BTreeTransTable<_>>::from_bytes("abcbc");
33//!
34//! // "cbc" is a suffix of "abcbc"
35//! assert!(sam.get_root_state().feed_bytes("cbc").is_accepting());
36//!
37//! // "bcb" is not a suffix of "abcbc"
38//! assert!(!sam.get_root_state().feed_bytes("bcb").is_accepting());
39//! ```
40//!
41//! ```rust
42//! use general_sam::{GeneralSam, BTreeTransTable};
43//!
44//! let sam = GeneralSam::<BTreeTransTable<_>>::from_chars("abcbc");
45//!
46//! let mut state = sam.get_root_state();
47//!
48//! // "b" is not a suffix but at least a substring of "abcbc"
49//! state.feed_chars("b");
50//! assert!(!state.is_accepting());
51//!
52//! // "bc" is a suffix of "abcbc"
53//! state.feed_chars("c");
54//! assert!(state.is_accepting());
55//!
56//! // "bcbc" is a suffix of "abcbc"
57//! state.feed_chars("bc");
58//! assert!(state.is_accepting());
59//!
60//! // "bcbcbc" is not a substring, much less a suffix of "abcbc"
61//! state.feed_chars("bc");
62//! assert!(!state.is_accepting() && state.is_nil());
63//! ```
64//!
65//! ```rust
66//! # #[cfg(feature = "trie")] {
67//! use general_sam::{GeneralSam, Trie, BTreeTransTable};
68//!
69//! let mut trie = Trie::<BTreeTransTable<_>>::default();
70//! trie.insert("hello".chars());
71//! trie.insert("Chielo".chars());
72//!
73//! let sam = GeneralSam::<BTreeTransTable<_>>::from_trie(trie.get_root_state());
74//!
75//! assert!(sam.get_root_state().feed_chars("lo").is_accepting());
76//! assert!(sam.get_root_state().feed_chars("ello").is_accepting());
77//! assert!(sam.get_root_state().feed_chars("elo").is_accepting());
78//!
79//! assert!(!sam.get_root_state().feed_chars("el").is_accepting());
80//! assert!(!sam.get_root_state().feed_chars("el").is_nil());
81//!
82//! assert!(!sam.get_root_state().feed_chars("bye").is_accepting());
83//! assert!(sam.get_root_state().feed_chars("bye").is_nil());
84//! # }
85//! ```
86//!
87//! # References
88//!
89//! - [Mehryar Mohri, Pedro Moreno, Eugene Weinstein.
90//!   General suffix automaton construction algorithm and space bounds.][paper]
91//! - 刘研绎《后缀自动机在字典树上的拓展》
92//! - [广义后缀自动机 - OI Wiki][general-sam-oi-wiki]
93//!
94//! [paper]: https://doi.org/10.1016/j.tcs.2009.03.034
95//! [general-sam-oi-wiki]: https://oi-wiki.org/string/general-sam/
96pub mod sam;
97pub mod table;
98pub mod trie_alike;
99
100pub use {
101    sam::{
102        GeneralSam, GeneralSamNode, GeneralSamNodeID, GeneralSamState, SAM_NIL_NODE_ID,
103        SAM_ROOT_NODE_ID,
104    },
105    table::{
106        BTreeTransTable, BoxBisectTable, ConstructiveTransitionTable, HashTransTable,
107        SmallAlphabet, TransitionTable, VecBisectTable, WholeAlphabetTable,
108    },
109    trie_alike::{IterAsChain, TravelEvent, TrieNodeAlike},
110};
111
112#[cfg(feature = "trie")]
113pub mod trie;
114#[cfg(feature = "trie")]
115pub use trie::{TRIE_NIL_NODE_ID, TRIE_ROOT_NODE_ID, Trie, TrieNode, TrieNodeID, TrieState};
116
117#[cfg(feature = "utils")]
118pub mod utils;
119#[cfg(feature = "utils")]
120pub use utils::{rope, suffixwise, tokenize, tokenize::GreedyTokenizer};
121
122#[cfg(test)]
123mod tests;
124
125#[cfg(doctest)]
126mod _doctest_readme {
127    #[doc = include_str!("../README.md")]
128    struct ReadMe;
129}