general_sam/utils/
suffixwise.rs1use std::{collections::LinkedList, convert::Infallible, ops::Deref};
4
5use crate::{
6 GeneralSam, GeneralSamState, SAM_NIL_NODE_ID, SAM_ROOT_NODE_ID, TransitionTable, TravelEvent,
7 TrieNodeAlike,
8 rope::{Rope, RopeBase, RopeData, RopeUntaggedInner, TreapBasedRopeBase},
9};
10
11#[derive(Clone, Default, Debug)]
12pub struct SuffixwiseData<Inner: RopeData + Default> {
13 data: Rope<Inner>,
14 min_suf_len: usize,
15 max_suf_len: usize,
16}
17
18impl<Inner: RopeData + Default> SuffixwiseData<Inner> {
19 pub fn get_rope(&self) -> &Rope<Inner> {
20 &self.data
21 }
22
23 pub fn get_min_suf_len(&self) -> usize {
24 self.min_suf_len
25 }
26
27 pub fn get_max_suf_len(&self) -> usize {
28 self.max_suf_len
29 }
30
31 pub fn map<NewInner: RopeData + Default, F: FnOnce(&Rope<Inner>) -> Rope<NewInner>>(
32 &self,
33 f: F,
34 ) -> SuffixwiseData<NewInner> {
35 SuffixwiseData {
36 data: f(&self.data),
37 min_suf_len: self.min_suf_len,
38 max_suf_len: self.max_suf_len,
39 }
40 }
41
42 pub fn get(&self, suf_len: usize) -> Option<Inner> {
43 if self.data.is_empty()
44 || self.max_suf_len == 0
45 || self.min_suf_len == 0
46 || suf_len < self.min_suf_len
47 || suf_len > self.max_suf_len
48 {
49 return None;
50 }
51 Some(
52 self.data
53 .query(suf_len - self.min_suf_len)
54 .expect("invalid suffixwise data")
55 .as_ref()
56 .deref()
57 .to_owned(),
58 )
59 }
60
61 pub fn build_from_sam<
62 TransTable: TransitionTable,
63 Iter: IntoIterator<Item = (usize, Inner)>,
64 FInit: FnMut(usize) -> Iter,
65 >(
66 sam: &GeneralSam<TransTable>,
67 mut f_init: FInit,
68 ) -> Vec<Self> {
69 let mut res = vec![Self::default(); sam.num_of_nodes()];
70 for node_id in sam.get_topo_and_suf_len_sorted_node_ids().iter().copied() {
71 assert_ne!(node_id, SAM_NIL_NODE_ID);
72
73 let node = sam.get_node(node_id).expect("invalid GeneralSam");
74 let node_data = res
75 .get_mut(node_id)
76 .unwrap_or_else(|| panic!("invalid node id: {}", node_id));
77
78 node_data.max_suf_len = node.max_suffix_len();
79
80 if node_id == SAM_ROOT_NODE_ID {
81 node_data.min_suf_len = 0;
82
83 node_data.data = Rope::new(Inner::default());
84 } else {
85 let parent_id = node.get_suffix_parent_id();
86 let parent = sam.get_node(parent_id).expect("invalid GeneralSam");
87
88 node_data.min_suf_len = parent.max_suffix_len() + 1;
89
90 assert_eq!(
91 node_data.data.len(),
92 node_data.max_suf_len - node_data.min_suf_len + 1
93 );
94
95 for (len, data) in f_init(node_id) {
96 assert!(len >= node_data.min_suf_len && len <= node_data.max_suf_len);
97 let (left, right) = node_data.data.split(len - node_data.min_suf_len);
98 let (_, right) = right.split(1);
99 node_data.data = left.merge(&Rope::new(data)).merge(&right);
100 }
101
102 assert_eq!(
103 node_data.data.len(),
104 node_data.max_suf_len - node_data.min_suf_len + 1
105 );
106 }
107
108 node.get_trans()
109 .transitions()
110 .copied()
111 .for_each(|target_id| {
112 res[target_id].data = res[target_id].data.merge(&res[node_id].data)
113 });
114 }
115 res
116 }
117}
118
119#[derive(Clone, Debug)]
120pub struct SuffixInTrie<Digested: Clone> {
121 pub digested_trie_node: Digested,
122 pub seq_len: usize,
123}
124
125pub type UntaggedSuffixData<Inner> = SuffixwiseData<RopeUntaggedInner<Inner>>;
126pub type SuffixInTrieData<D> = UntaggedSuffixData<Option<SuffixInTrie<D>>>;
127
128impl<Digested: Clone> SuffixInTrieData<Digested> {
129 pub fn build<
130 TransTable: TransitionTable,
131 TN: TrieNodeAlike<InnerType = TransTable::KeyType>,
132 F: FnMut(&TN) -> Digested,
133 >(
134 sam: &GeneralSam<TransTable>,
135 trie_node: TN,
136 mut f: F,
137 ) -> Vec<Self> {
138 let mut sam_to_data = vec![LinkedList::<SuffixInTrie<Digested>>::new(); sam.num_of_nodes()];
139 let callback =
140 |event: TravelEvent<(&GeneralSamState<_, &GeneralSam<_>>, &TN), _, _>| -> Result<_, Infallible> {
141 match event {
142 crate::TravelEvent::Pop((sam_state, trie_state), len) => {
143 if trie_state.is_accepting() {
144 sam_to_data[sam_state.node_id].push_back(SuffixInTrie {
145 digested_trie_node: f(trie_state),
146 seq_len: len,
147 });
148 }
149 Ok(len)
150 }
151 crate::TravelEvent::PushRoot(_) => Ok(0),
152 crate::TravelEvent::Push(_, len, _) => Ok(len + 1),
153 }
154 };
155 sam.get_root_state().bfs_along(trie_node, callback).unwrap();
156 Self::build_from_sam(sam, |node_id| {
157 sam_to_data[node_id]
158 .iter()
159 .map(|x| (x.seq_len, Some(x.clone()).into()))
160 })
161 }
162}