general_sam/utils/
suffixwise.rs

1//! Utilities to store suffix-wise data in a suffix automaton.
2
3use std::{collections::LinkedList, convert::Infallible, ops::Deref};
4
5use crate::{
6    GeneralSam, GeneralSamState, SAM_NIL_NODE_ID, SAM_ROOT_NODE_ID, TransitionTable, TravelEvent,
7    TrieNodeAlike,
8    rope::{Rope, RopeBase, RopeData, RopeUntaggedInner, TreapBasedRopeBase},
9};
10
11#[derive(Clone, Default, Debug)]
12pub struct SuffixwiseData<Inner: RopeData + Default> {
13    data: Rope<Inner>,
14    min_suf_len: usize,
15    max_suf_len: usize,
16}
17
18impl<Inner: RopeData + Default> SuffixwiseData<Inner> {
19    pub fn get_rope(&self) -> &Rope<Inner> {
20        &self.data
21    }
22
23    pub fn get_min_suf_len(&self) -> usize {
24        self.min_suf_len
25    }
26
27    pub fn get_max_suf_len(&self) -> usize {
28        self.max_suf_len
29    }
30
31    pub fn map<NewInner: RopeData + Default, F: FnOnce(&Rope<Inner>) -> Rope<NewInner>>(
32        &self,
33        f: F,
34    ) -> SuffixwiseData<NewInner> {
35        SuffixwiseData {
36            data: f(&self.data),
37            min_suf_len: self.min_suf_len,
38            max_suf_len: self.max_suf_len,
39        }
40    }
41
42    pub fn get(&self, suf_len: usize) -> Option<Inner> {
43        if self.data.is_empty()
44            || self.max_suf_len == 0
45            || self.min_suf_len == 0
46            || suf_len < self.min_suf_len
47            || suf_len > self.max_suf_len
48        {
49            return None;
50        }
51        Some(
52            self.data
53                .query(suf_len - self.min_suf_len)
54                .expect("invalid suffixwise data")
55                .as_ref()
56                .deref()
57                .to_owned(),
58        )
59    }
60
61    pub fn build_from_sam<
62        TransTable: TransitionTable,
63        Iter: IntoIterator<Item = (usize, Inner)>,
64        FInit: FnMut(usize) -> Iter,
65    >(
66        sam: &GeneralSam<TransTable>,
67        mut f_init: FInit,
68    ) -> Vec<Self> {
69        let mut res = vec![Self::default(); sam.num_of_nodes()];
70        for node_id in sam.get_topo_and_suf_len_sorted_node_ids().iter().copied() {
71            assert_ne!(node_id, SAM_NIL_NODE_ID);
72
73            let node = sam.get_node(node_id).expect("invalid GeneralSam");
74            let node_data = res
75                .get_mut(node_id)
76                .unwrap_or_else(|| panic!("invalid node id: {}", node_id));
77
78            node_data.max_suf_len = node.max_suffix_len();
79
80            if node_id == SAM_ROOT_NODE_ID {
81                node_data.min_suf_len = 0;
82
83                node_data.data = Rope::new(Inner::default());
84            } else {
85                let parent_id = node.get_suffix_parent_id();
86                let parent = sam.get_node(parent_id).expect("invalid GeneralSam");
87
88                node_data.min_suf_len = parent.max_suffix_len() + 1;
89
90                assert_eq!(
91                    node_data.data.len(),
92                    node_data.max_suf_len - node_data.min_suf_len + 1
93                );
94
95                for (len, data) in f_init(node_id) {
96                    assert!(len >= node_data.min_suf_len && len <= node_data.max_suf_len);
97                    let (left, right) = node_data.data.split(len - node_data.min_suf_len);
98                    let (_, right) = right.split(1);
99                    node_data.data = left.merge(&Rope::new(data)).merge(&right);
100                }
101
102                assert_eq!(
103                    node_data.data.len(),
104                    node_data.max_suf_len - node_data.min_suf_len + 1
105                );
106            }
107
108            node.get_trans()
109                .transitions()
110                .copied()
111                .for_each(|target_id| {
112                    res[target_id].data = res[target_id].data.merge(&res[node_id].data)
113                });
114        }
115        res
116    }
117}
118
119#[derive(Clone, Debug)]
120pub struct SuffixInTrie<Digested: Clone> {
121    pub digested_trie_node: Digested,
122    pub seq_len: usize,
123}
124
125pub type UntaggedSuffixData<Inner> = SuffixwiseData<RopeUntaggedInner<Inner>>;
126pub type SuffixInTrieData<D> = UntaggedSuffixData<Option<SuffixInTrie<D>>>;
127
128impl<Digested: Clone> SuffixInTrieData<Digested> {
129    pub fn build<
130        TransTable: TransitionTable,
131        TN: TrieNodeAlike<InnerType = TransTable::KeyType>,
132        F: FnMut(&TN) -> Digested,
133    >(
134        sam: &GeneralSam<TransTable>,
135        trie_node: TN,
136        mut f: F,
137    ) -> Vec<Self> {
138        let mut sam_to_data = vec![LinkedList::<SuffixInTrie<Digested>>::new(); sam.num_of_nodes()];
139        let callback =
140            |event: TravelEvent<(&GeneralSamState<_, &GeneralSam<_>>, &TN), _, _>| -> Result<_, Infallible> {
141                match event {
142                    crate::TravelEvent::Pop((sam_state, trie_state), len) => {
143                        if trie_state.is_accepting() {
144                            sam_to_data[sam_state.node_id].push_back(SuffixInTrie {
145                                digested_trie_node: f(trie_state),
146                                seq_len: len,
147                            });
148                        }
149                        Ok(len)
150                    }
151                    crate::TravelEvent::PushRoot(_) => Ok(0),
152                    crate::TravelEvent::Push(_, len, _) => Ok(len + 1),
153                }
154            };
155        sam.get_root_state().bfs_along(trie_node, callback).unwrap();
156        Self::build_from_sam(sam, |node_id| {
157            sam_to_data[node_id]
158                .iter()
159                .map(|x| (x.seq_len, Some(x.clone()).into()))
160        })
161    }
162}