general_sam/sam/
mod.rs

1//! A general suffix automaton implementation.
2
3mod state;
4pub use state::GeneralSamState;
5
6use std::convert::Infallible;
7
8use crate::{
9    ConstructiveTransitionTable, IterAsChain, TransitionTable, TravelEvent, TrieNodeAlike,
10};
11
12pub type GeneralSamNodeID = usize;
13pub const SAM_NIL_NODE_ID: GeneralSamNodeID = 0;
14pub const SAM_ROOT_NODE_ID: GeneralSamNodeID = 1;
15
16#[derive(Clone, Debug)]
17pub struct GeneralSamNode<TransTable: TransitionTable> {
18    trans: TransTable,
19    len: usize,
20    link: GeneralSamNodeID,
21    accept: bool,
22}
23
24/// A general suffix automaton.
25#[derive(Clone, Debug)]
26pub struct GeneralSam<TransTable: TransitionTable> {
27    node_pool: Vec<GeneralSamNode<TransTable>>,
28    topo_and_suf_len_sorted_order: Vec<GeneralSamNodeID>,
29}
30
31impl<TransTable: ConstructiveTransitionTable> GeneralSamNode<TransTable> {
32    fn new(accept: bool, len: usize, link: GeneralSamNodeID) -> Self {
33        Self {
34            trans: Default::default(),
35            accept,
36            len,
37            link,
38        }
39    }
40}
41
42impl<TransTable: TransitionTable> GeneralSamNode<TransTable> {
43    pub fn is_accepting(&self) -> bool {
44        self.accept
45    }
46
47    pub fn max_suffix_len(&self) -> usize {
48        self.len
49    }
50
51    pub fn get_suffix_parent_id(&self) -> GeneralSamNodeID {
52        self.link
53    }
54
55    pub fn get_trans(&self) -> &TransTable {
56        &self.trans
57    }
58
59    fn alter_trans_table<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
60        &self,
61    ) -> GeneralSamNode<NewTableType> {
62        GeneralSamNode {
63            trans: NewTableType::from_kv_iter(self.trans.iter()),
64            accept: self.accept,
65            len: self.len,
66            link: self.link,
67        }
68    }
69}
70
71impl<TransTable: ConstructiveTransitionTable<KeyType = u8>> GeneralSam<TransTable> {
72    pub fn from_bytes<S: AsRef<[u8]>>(s: S) -> Self {
73        let iter = IterAsChain::from(s.as_ref().iter().copied());
74        Self::from_trie(iter)
75    }
76}
77
78impl<TransTable: ConstructiveTransitionTable<KeyType = u32>> GeneralSam<TransTable> {
79    pub fn from_utf32<S: AsRef<[u32]>>(s: S) -> Self {
80        let iter = IterAsChain::from(s.as_ref().iter().copied());
81        Self::from_trie(iter)
82    }
83}
84
85impl<TransTable: ConstructiveTransitionTable<KeyType = char>> GeneralSam<TransTable> {
86    pub fn from_chars<S: AsRef<str>>(s: S) -> Self {
87        let iter = IterAsChain::from(s.as_ref().chars());
88        Self::from_trie(iter)
89    }
90}
91
92impl<TransTable: ConstructiveTransitionTable> Default for GeneralSam<TransTable> {
93    fn default() -> Self {
94        Self {
95            node_pool: vec![
96                GeneralSamNode::new(false, 0, SAM_NIL_NODE_ID),
97                GeneralSamNode::new(true, 0, SAM_NIL_NODE_ID),
98            ],
99            topo_and_suf_len_sorted_order: Default::default(),
100        }
101    }
102}
103
104impl<TransTable: TransitionTable> GeneralSam<TransTable> {
105    pub fn num_of_nodes(&self) -> usize {
106        self.node_pool.len()
107    }
108
109    pub fn get_root_node(&self) -> &GeneralSamNode<TransTable> {
110        self.get_node(SAM_ROOT_NODE_ID).unwrap()
111    }
112
113    pub fn get_node(&self, node_id: GeneralSamNodeID) -> Option<&GeneralSamNode<TransTable>> {
114        self.node_pool.get(node_id)
115    }
116
117    pub fn get_root_state(&self) -> GeneralSamState<TransTable, &GeneralSam<TransTable>> {
118        self.get_state(SAM_ROOT_NODE_ID)
119    }
120
121    pub fn get_state(
122        &self,
123        node_id: GeneralSamNodeID,
124    ) -> GeneralSamState<TransTable, &GeneralSam<TransTable>> {
125        if node_id < self.node_pool.len() {
126            GeneralSamState::new(self, node_id)
127        } else {
128            GeneralSamState::new(self, SAM_NIL_NODE_ID)
129        }
130    }
131
132    /// Returns topological sorted, maximum suffix length sorted
133    /// and suffix parent depth sorted node id sequence,
134    /// which is generated by topological sorting with a queue.
135    pub fn get_topo_and_suf_len_sorted_node_ids(&self) -> &Vec<GeneralSamNodeID> {
136        &self.topo_and_suf_len_sorted_order
137    }
138
139    pub fn alter_trans_table<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
140        &self,
141    ) -> GeneralSam<NewTableType> {
142        GeneralSam {
143            node_pool: self
144                .node_pool
145                .iter()
146                .map(|x| x.alter_trans_table())
147                .collect(),
148            topo_and_suf_len_sorted_order: self.topo_and_suf_len_sorted_order.clone(),
149        }
150    }
151
152    pub fn alter_trans_table_into<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
153        self,
154    ) -> GeneralSam<NewTableType> {
155        GeneralSam {
156            node_pool: self
157                .node_pool
158                .iter()
159                .map(|x| x.alter_trans_table())
160                .collect(),
161            topo_and_suf_len_sorted_order: self.topo_and_suf_len_sorted_order,
162        }
163    }
164}
165
166impl<TransTable: ConstructiveTransitionTable> GeneralSam<TransTable> {
167    pub fn from_trie<TN: TrieNodeAlike>(node: TN) -> Self
168    where
169        TN::InnerType: Into<TransTable::KeyType>,
170    {
171        let mut sam = Self::default();
172
173        let accept_empty_string = node.is_accepting();
174
175        sam.build_with_trie(node);
176        sam.topo_sort_with_queue();
177        sam.update_accepting();
178
179        sam.node_pool[SAM_ROOT_NODE_ID].accept = accept_empty_string;
180
181        sam
182    }
183
184    fn build_with_trie<TN: TrieNodeAlike>(&mut self, node: TN)
185    where
186        TN::InnerType: Into<TransTable::KeyType>,
187    {
188        node.bfs_travel(|event| -> Result<GeneralSamNodeID, Infallible> {
189            match event {
190                TravelEvent::PushRoot(_) => Ok(SAM_ROOT_NODE_ID),
191                TravelEvent::Push(cur_tn, cur_node_id, key) => {
192                    Ok(self.insert_node_trans(*cur_node_id, key, cur_tn.is_accepting()))
193                }
194                TravelEvent::Pop(_, cur_node_id) => Ok(cur_node_id),
195            }
196        })
197        .unwrap();
198    }
199
200    fn topo_sort_with_queue(&mut self) {
201        let mut in_degree: Vec<usize> = vec![0; self.num_of_nodes()];
202
203        self.node_pool.iter().for_each(|node| {
204            node.trans.transitions().for_each(|v| {
205                in_degree[*v] += 1;
206            });
207        });
208        assert!(in_degree[SAM_ROOT_NODE_ID] == 0);
209
210        self.topo_and_suf_len_sorted_order
211            .reserve(self.node_pool.len());
212
213        self.topo_and_suf_len_sorted_order.push(SAM_ROOT_NODE_ID);
214        let mut head = 0;
215        while head < self.topo_and_suf_len_sorted_order.len() {
216            let u_id = self.topo_and_suf_len_sorted_order[head];
217            head += 1;
218            self.node_pool[u_id].trans.transitions().for_each(|v_id| {
219                in_degree[*v_id] -= 1;
220                if in_degree[*v_id] == 0 {
221                    self.topo_and_suf_len_sorted_order.push(*v_id);
222                }
223            });
224        }
225    }
226
227    fn update_accepting(&mut self) {
228        self.topo_and_suf_len_sorted_order
229            .iter()
230            .rev()
231            .for_each(|node_id| {
232                let link_id = self.node_pool[*node_id].link;
233                self.node_pool[link_id].accept |= self.node_pool[*node_id].accept;
234            });
235        self.node_pool[SAM_NIL_NODE_ID].accept = false;
236    }
237
238    fn alloc_node(&mut self, node: GeneralSamNode<TransTable>) -> GeneralSamNodeID {
239        let id = self.node_pool.len();
240        self.node_pool.push(node);
241        id
242    }
243
244    fn insert_node_trans<Key: Into<TransTable::KeyType>>(
245        &mut self,
246        last_node_id: GeneralSamNodeID,
247        key: Key,
248        accept: bool,
249    ) -> GeneralSamNodeID {
250        let key: TransTable::KeyType = key.into();
251
252        let new_node_id = {
253            let last_node = &self.node_pool[last_node_id];
254            self.alloc_node(GeneralSamNode::new(
255                accept,
256                last_node.len + 1,
257                SAM_NIL_NODE_ID,
258            ))
259        };
260
261        let mut p_node_id = last_node_id;
262        while p_node_id != SAM_NIL_NODE_ID {
263            let p_node = &mut self.node_pool[p_node_id];
264            if p_node.trans.contains_key(&key) {
265                break;
266            }
267            p_node.trans.insert(key.clone(), new_node_id);
268            p_node_id = p_node.link;
269        }
270
271        if p_node_id == SAM_NIL_NODE_ID {
272            self.node_pool[new_node_id].link = SAM_ROOT_NODE_ID;
273            return new_node_id;
274        }
275
276        let q_node_id = *self.node_pool[p_node_id].trans.get(&key).unwrap();
277        let q_node = &self.node_pool[q_node_id];
278        if q_node.len == self.node_pool[p_node_id].len + 1 {
279            self.node_pool[new_node_id].link = q_node_id;
280            return new_node_id;
281        }
282
283        let clone_node_id = self.alloc_node(q_node.clone());
284        self.node_pool[clone_node_id].len = self.node_pool[p_node_id].len + 1;
285        while p_node_id != SAM_NIL_NODE_ID {
286            let p_node = &mut self.node_pool[p_node_id];
287            if let Some(t_node_id) = p_node.trans.get_mut(&key)
288                && *t_node_id == q_node_id
289            {
290                *t_node_id = clone_node_id;
291                p_node_id = p_node.link;
292                continue;
293            }
294            break;
295        }
296
297        self.node_pool[new_node_id].link = clone_node_id;
298        self.node_pool[q_node_id].link = clone_node_id;
299
300        new_node_id
301    }
302}