jieba_rs/
sparse_dag.rs

1use crate::FxHashMap as HashMap;
2
3pub(crate) struct StaticSparseDAG {
4    array: Vec<usize>,
5    start_pos: HashMap<usize, usize>,
6    size_hint_for_iterator: usize,
7    curr_insertion_len: usize,
8}
9
10pub struct EdgeIter<'a> {
11    dag: &'a StaticSparseDAG,
12    cursor: usize,
13}
14
15impl Iterator for EdgeIter<'_> {
16    type Item = usize;
17
18    fn size_hint(&self) -> (usize, Option<usize>) {
19        (0, Some(self.dag.size_hint_for_iterator))
20    }
21
22    fn next(&mut self) -> Option<Self::Item> {
23        if self.dag.array[self.cursor] == 0 {
24            self.cursor += 1;
25            None
26        } else {
27            let v = self.dag.array[self.cursor] - 1;
28            self.cursor += 1;
29            Some(v)
30        }
31    }
32}
33
34impl StaticSparseDAG {
35    pub(crate) fn with_size_hint(hint: usize) -> Self {
36        // Cap the allocation to prevent memory issues with very large inputs
37        // Use a more conservative multiplier to reduce memory overhead
38        const MAX_CAPACITY: usize = 4_000_000;
39        const MULTIPLIER: usize = 4; // Reduced from 5 to 4
40        const MIN_CAPACITY: usize = 32; // Minimum useful capacity
41
42        let capacity = std::cmp::max(std::cmp::min(hint * MULTIPLIER, MAX_CAPACITY), MIN_CAPACITY);
43
44        StaticSparseDAG {
45            array: Vec::with_capacity(capacity),
46            start_pos: HashMap::default(),
47            size_hint_for_iterator: 0,
48            curr_insertion_len: 0,
49        }
50    }
51
52    #[inline]
53    pub(crate) fn start(&mut self, from: usize) {
54        let idx = self.array.len();
55        self.curr_insertion_len = 0;
56        self.start_pos.insert(from, idx);
57    }
58
59    #[inline]
60    pub(crate) fn insert(&mut self, to: usize) {
61        self.curr_insertion_len += 1;
62        self.array.push(to + 1);
63    }
64
65    #[inline]
66    pub(crate) fn commit(&mut self) {
67        self.size_hint_for_iterator = std::cmp::max(self.curr_insertion_len, self.size_hint_for_iterator);
68        self.array.push(0);
69    }
70
71    #[inline]
72    pub(crate) fn iter_edges(&self, from: usize) -> EdgeIter<'_> {
73        let cursor = self.start_pos.get(&from).unwrap().to_owned();
74
75        EdgeIter { dag: self, cursor }
76    }
77
78    pub(crate) fn clear(&mut self) {
79        self.array.clear();
80        self.start_pos.clear();
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn test_static_sparse_dag() {
90        let mut dag = StaticSparseDAG::with_size_hint(5);
91        let mut ans: Vec<Vec<usize>> = vec![Vec::new(); 5];
92        for i in 0..=3 {
93            dag.start(i);
94            for j in (i + 1)..=4 {
95                ans[i].push(j);
96                dag.insert(j);
97            }
98
99            dag.commit()
100        }
101
102        assert_eq!(dag.size_hint_for_iterator, 4);
103
104        for i in 0..=3 {
105            let edges: Vec<usize> = dag.iter_edges(i).collect();
106            assert_eq!(ans[i], edges);
107        }
108    }
109}