1mod state;
4pub use state::GeneralSamState;
5
6use std::convert::Infallible;
7
8use crate::{
9 ConstructiveTransitionTable, IterAsChain, TransitionTable, TravelEvent, TrieNodeAlike,
10};
11
12pub type GeneralSamNodeID = usize;
13pub const SAM_NIL_NODE_ID: GeneralSamNodeID = 0;
14pub const SAM_ROOT_NODE_ID: GeneralSamNodeID = 1;
15
16#[derive(Clone, Debug)]
17pub struct GeneralSamNode<TransTable: TransitionTable> {
18 trans: TransTable,
19 len: usize,
20 link: GeneralSamNodeID,
21 accept: bool,
22}
23
24#[derive(Clone, Debug)]
26pub struct GeneralSam<TransTable: TransitionTable> {
27 node_pool: Vec<GeneralSamNode<TransTable>>,
28 topo_and_suf_len_sorted_order: Vec<GeneralSamNodeID>,
29}
30
31impl<TransTable: ConstructiveTransitionTable> GeneralSamNode<TransTable> {
32 fn new(accept: bool, len: usize, link: GeneralSamNodeID) -> Self {
33 Self {
34 trans: Default::default(),
35 accept,
36 len,
37 link,
38 }
39 }
40}
41
42impl<TransTable: TransitionTable> GeneralSamNode<TransTable> {
43 pub fn is_accepting(&self) -> bool {
44 self.accept
45 }
46
47 pub fn max_suffix_len(&self) -> usize {
48 self.len
49 }
50
51 pub fn get_suffix_parent_id(&self) -> GeneralSamNodeID {
52 self.link
53 }
54
55 pub fn get_trans(&self) -> &TransTable {
56 &self.trans
57 }
58
59 fn alter_trans_table<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
60 &self,
61 ) -> GeneralSamNode<NewTableType> {
62 GeneralSamNode {
63 trans: NewTableType::from_kv_iter(self.trans.iter()),
64 accept: self.accept,
65 len: self.len,
66 link: self.link,
67 }
68 }
69}
70
71impl<TransTable: ConstructiveTransitionTable<KeyType = u8>> GeneralSam<TransTable> {
72 pub fn from_bytes<S: AsRef<[u8]>>(s: S) -> Self {
73 let iter = IterAsChain::from(s.as_ref().iter().copied());
74 Self::from_trie(iter)
75 }
76}
77
78impl<TransTable: ConstructiveTransitionTable<KeyType = u32>> GeneralSam<TransTable> {
79 pub fn from_utf32<S: AsRef<[u32]>>(s: S) -> Self {
80 let iter = IterAsChain::from(s.as_ref().iter().copied());
81 Self::from_trie(iter)
82 }
83}
84
85impl<TransTable: ConstructiveTransitionTable<KeyType = char>> GeneralSam<TransTable> {
86 pub fn from_chars<S: AsRef<str>>(s: S) -> Self {
87 let iter = IterAsChain::from(s.as_ref().chars());
88 Self::from_trie(iter)
89 }
90}
91
92impl<TransTable: ConstructiveTransitionTable> Default for GeneralSam<TransTable> {
93 fn default() -> Self {
94 Self {
95 node_pool: vec![
96 GeneralSamNode::new(false, 0, SAM_NIL_NODE_ID),
97 GeneralSamNode::new(true, 0, SAM_NIL_NODE_ID),
98 ],
99 topo_and_suf_len_sorted_order: Default::default(),
100 }
101 }
102}
103
104impl<TransTable: TransitionTable> GeneralSam<TransTable> {
105 pub fn num_of_nodes(&self) -> usize {
106 self.node_pool.len()
107 }
108
109 pub fn get_root_node(&self) -> &GeneralSamNode<TransTable> {
110 self.get_node(SAM_ROOT_NODE_ID).unwrap()
111 }
112
113 pub fn get_node(&self, node_id: GeneralSamNodeID) -> Option<&GeneralSamNode<TransTable>> {
114 self.node_pool.get(node_id)
115 }
116
117 pub fn get_root_state(&self) -> GeneralSamState<TransTable, &GeneralSam<TransTable>> {
118 self.get_state(SAM_ROOT_NODE_ID)
119 }
120
121 pub fn get_state(
122 &self,
123 node_id: GeneralSamNodeID,
124 ) -> GeneralSamState<TransTable, &GeneralSam<TransTable>> {
125 if node_id < self.node_pool.len() {
126 GeneralSamState::new(self, node_id)
127 } else {
128 GeneralSamState::new(self, SAM_NIL_NODE_ID)
129 }
130 }
131
132 pub fn get_topo_and_suf_len_sorted_node_ids(&self) -> &Vec<GeneralSamNodeID> {
136 &self.topo_and_suf_len_sorted_order
137 }
138
139 pub fn alter_trans_table<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
140 &self,
141 ) -> GeneralSam<NewTableType> {
142 GeneralSam {
143 node_pool: self
144 .node_pool
145 .iter()
146 .map(|x| x.alter_trans_table())
147 .collect(),
148 topo_and_suf_len_sorted_order: self.topo_and_suf_len_sorted_order.clone(),
149 }
150 }
151
152 pub fn alter_trans_table_into<NewTableType: TransitionTable<KeyType = TransTable::KeyType>>(
153 self,
154 ) -> GeneralSam<NewTableType> {
155 GeneralSam {
156 node_pool: self
157 .node_pool
158 .iter()
159 .map(|x| x.alter_trans_table())
160 .collect(),
161 topo_and_suf_len_sorted_order: self.topo_and_suf_len_sorted_order,
162 }
163 }
164}
165
166impl<TransTable: ConstructiveTransitionTable> GeneralSam<TransTable> {
167 pub fn from_trie<TN: TrieNodeAlike>(node: TN) -> Self
168 where
169 TN::InnerType: Into<TransTable::KeyType>,
170 {
171 let mut sam = Self::default();
172
173 let accept_empty_string = node.is_accepting();
174
175 sam.build_with_trie(node);
176 sam.topo_sort_with_queue();
177 sam.update_accepting();
178
179 sam.node_pool[SAM_ROOT_NODE_ID].accept = accept_empty_string;
180
181 sam
182 }
183
184 fn build_with_trie<TN: TrieNodeAlike>(&mut self, node: TN)
185 where
186 TN::InnerType: Into<TransTable::KeyType>,
187 {
188 node.bfs_travel(|event| -> Result<GeneralSamNodeID, Infallible> {
189 match event {
190 TravelEvent::PushRoot(_) => Ok(SAM_ROOT_NODE_ID),
191 TravelEvent::Push(cur_tn, cur_node_id, key) => {
192 Ok(self.insert_node_trans(*cur_node_id, key, cur_tn.is_accepting()))
193 }
194 TravelEvent::Pop(_, cur_node_id) => Ok(cur_node_id),
195 }
196 })
197 .unwrap();
198 }
199
200 fn topo_sort_with_queue(&mut self) {
201 let mut in_degree: Vec<usize> = vec![0; self.num_of_nodes()];
202
203 self.node_pool.iter().for_each(|node| {
204 node.trans.transitions().for_each(|v| {
205 in_degree[*v] += 1;
206 });
207 });
208 assert!(in_degree[SAM_ROOT_NODE_ID] == 0);
209
210 self.topo_and_suf_len_sorted_order
211 .reserve(self.node_pool.len());
212
213 self.topo_and_suf_len_sorted_order.push(SAM_ROOT_NODE_ID);
214 let mut head = 0;
215 while head < self.topo_and_suf_len_sorted_order.len() {
216 let u_id = self.topo_and_suf_len_sorted_order[head];
217 head += 1;
218 self.node_pool[u_id].trans.transitions().for_each(|v_id| {
219 in_degree[*v_id] -= 1;
220 if in_degree[*v_id] == 0 {
221 self.topo_and_suf_len_sorted_order.push(*v_id);
222 }
223 });
224 }
225 }
226
227 fn update_accepting(&mut self) {
228 self.topo_and_suf_len_sorted_order
229 .iter()
230 .rev()
231 .for_each(|node_id| {
232 let link_id = self.node_pool[*node_id].link;
233 self.node_pool[link_id].accept |= self.node_pool[*node_id].accept;
234 });
235 self.node_pool[SAM_NIL_NODE_ID].accept = false;
236 }
237
238 fn alloc_node(&mut self, node: GeneralSamNode<TransTable>) -> GeneralSamNodeID {
239 let id = self.node_pool.len();
240 self.node_pool.push(node);
241 id
242 }
243
244 fn insert_node_trans<Key: Into<TransTable::KeyType>>(
245 &mut self,
246 last_node_id: GeneralSamNodeID,
247 key: Key,
248 accept: bool,
249 ) -> GeneralSamNodeID {
250 let key: TransTable::KeyType = key.into();
251
252 let new_node_id = {
253 let last_node = &self.node_pool[last_node_id];
254 self.alloc_node(GeneralSamNode::new(
255 accept,
256 last_node.len + 1,
257 SAM_NIL_NODE_ID,
258 ))
259 };
260
261 let mut p_node_id = last_node_id;
262 while p_node_id != SAM_NIL_NODE_ID {
263 let p_node = &mut self.node_pool[p_node_id];
264 if p_node.trans.contains_key(&key) {
265 break;
266 }
267 p_node.trans.insert(key.clone(), new_node_id);
268 p_node_id = p_node.link;
269 }
270
271 if p_node_id == SAM_NIL_NODE_ID {
272 self.node_pool[new_node_id].link = SAM_ROOT_NODE_ID;
273 return new_node_id;
274 }
275
276 let q_node_id = *self.node_pool[p_node_id].trans.get(&key).unwrap();
277 let q_node = &self.node_pool[q_node_id];
278 if q_node.len == self.node_pool[p_node_id].len + 1 {
279 self.node_pool[new_node_id].link = q_node_id;
280 return new_node_id;
281 }
282
283 let clone_node_id = self.alloc_node(q_node.clone());
284 self.node_pool[clone_node_id].len = self.node_pool[p_node_id].len + 1;
285 while p_node_id != SAM_NIL_NODE_ID {
286 let p_node = &mut self.node_pool[p_node_id];
287 if let Some(t_node_id) = p_node.trans.get_mut(&key)
288 && *t_node_id == q_node_id
289 {
290 *t_node_id = clone_node_id;
291 p_node_id = p_node.link;
292 continue;
293 }
294 break;
295 }
296
297 self.node_pool[new_node_id].link = clone_node_id;
298 self.node_pool[q_node_id].link = clone_node_id;
299
300 new_node_id
301 }
302}