lance_core/container/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::LinkedList;
5
6use deepsize::DeepSizeOf;
7
8/// A linked list that grows exponentially. It is used to store a large number of
9/// elements in a memory-efficient way. The list grows by doubling the capacity of
10/// the last element when it's full, the capacity can be limited by the `limit`
11/// parameter. The default value is 0, which means no limit.
12#[derive(Debug, Clone, Default)]
13pub struct ExpLinkedList<T> {
14    inner: LinkedList<Vec<T>>,
15    len: usize,
16    // The maximum capacity of single node in the list.
17    // If the limit is 0, there is no limit.
18    // We use u16 to save memory because ExpLinkedList should not
19    // be used if the limit is that large.
20    limit: u16,
21}
22
23impl<T> ExpLinkedList<T> {
24    /// Creates a new empty `ExpLinkedList`.
25    pub fn new() -> Self {
26        Self {
27            inner: LinkedList::new(),
28            len: 0,
29            limit: 0,
30        }
31    }
32
33    pub fn with_capacity(capacity: usize) -> Self {
34        let mut inner = LinkedList::new();
35        inner.push_back(Vec::with_capacity(capacity));
36        Self {
37            inner,
38            len: 0,
39            limit: 0,
40        }
41    }
42
43    /// Creates a new `ExpLinkedList` with a specified capacity limit.
44    /// The limit is the maximum capacity of a single node in the list.
45    /// If the limit is 0, there is no limit.
46    pub fn with_capacity_limit(mut self, limit: u16) -> Self {
47        self.limit = limit;
48        self
49    }
50
51    /// Pushes a new element into the list. If the last element in the list
52    /// reaches its capacity, a new node is created with double capacity.
53    pub fn push(&mut self, v: T) {
54        match self.inner.back() {
55            Some(last) => {
56                if last.len() == last.capacity() {
57                    let new_cap = if self.inner.len() == 1 {
58                        last.capacity()
59                    } else if self.limit > 0 && last.capacity() * 2 >= self.limit as usize {
60                        self.limit as usize
61                    } else {
62                        last.capacity() * 2
63                    };
64                    self.inner.push_back(Vec::with_capacity(new_cap));
65                }
66            }
67            None => {
68                self.inner.push_back(Vec::with_capacity(2));
69            }
70        }
71        self.do_push(v);
72    }
73
74    fn do_push(&mut self, v: T) {
75        self.inner.back_mut().unwrap().push(v);
76        self.len += 1;
77    }
78
79    /// Removes the last element from the list.
80    pub fn pop(&mut self) -> Option<T> {
81        match self.inner.back_mut() {
82            Some(last) => {
83                if last.is_empty() {
84                    self.inner.pop_back();
85                    self.pop()
86                } else {
87                    self.len -= 1;
88                    last.pop()
89                }
90            }
91            None => None,
92        }
93    }
94
95    /// Clears the list, removing all elements.
96    /// This will free the memory used by the list.
97    pub fn clear(&mut self) {
98        self.inner.clear();
99        self.len = 0;
100    }
101
102    /// Returns the number of elements in the list.
103    pub fn len(&self) -> usize {
104        self.len
105    }
106
107    /// Returns whether the list is empty.
108    pub fn is_empty(&self) -> bool {
109        self.inner.is_empty()
110    }
111
112    /// Returns the size of list, including the size of the elements and the
113    /// size of the list itself, and the unused space.
114    /// The element size is calculated using `std::mem::size_of::<T>()`,
115    /// so it is not accurate for all types.
116    /// For example, for `String`, it will return the size of the pointer,
117    /// not the size of the string itself. For that you need to use `DeepSizeOf`.
118    pub fn size(&self) -> usize {
119        let unused_space = match self.inner.back() {
120            Some(last) => last.capacity() - last.len(),
121            None => 0,
122        };
123        (self.len() + unused_space) * std::mem::size_of::<T>()
124            + std::mem::size_of::<Self>()
125            + self.inner.len() * std::mem::size_of::<Vec<T>>()
126    }
127
128    /// Returns an iterator over the elements in the list.
129    pub fn iter(&self) -> ExpLinkedListIter<'_, T> {
130        ExpLinkedListIter::new(self)
131    }
132
133    pub fn block_iter(&self) -> impl Iterator<Item = &[T]> {
134        self.inner.iter().map(|v| v.as_slice())
135    }
136}
137
138impl<T: DeepSizeOf> DeepSizeOf for ExpLinkedList<T> {
139    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
140        self.inner
141            .iter()
142            .map(|v| v.deep_size_of_children(context))
143            .sum()
144    }
145}
146
147impl<T> FromIterator<T> for ExpLinkedList<T> {
148    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
149        let iter = iter.into_iter();
150        let size_hint = iter.size_hint().0;
151        let cap = if size_hint > 0 { size_hint } else { 1 };
152        let mut list = Self::with_capacity(cap);
153        for item in iter {
154            list.push(item);
155        }
156        list
157    }
158}
159
160pub struct ExpLinkedListIter<'a, T> {
161    inner: std::collections::linked_list::Iter<'a, Vec<T>>,
162    inner_iter: Option<std::slice::Iter<'a, T>>,
163    len: usize,
164}
165
166impl<'a, T> ExpLinkedListIter<'a, T> {
167    pub fn new(inner: &'a ExpLinkedList<T>) -> Self {
168        Self {
169            inner: inner.inner.iter(),
170            inner_iter: None,
171            len: inner.len(),
172        }
173    }
174}
175
176impl<'a, T> Iterator for ExpLinkedListIter<'a, T> {
177    type Item = &'a T;
178
179    fn next(&mut self) -> Option<Self::Item> {
180        if let Some(inner_iter) = &mut self.inner_iter {
181            if let Some(v) = inner_iter.next() {
182                return Some(v);
183            }
184        }
185        if let Some(inner) = self.inner.next() {
186            self.inner_iter = Some(inner.iter());
187            return self.next();
188        }
189        None
190    }
191
192    fn size_hint(&self) -> (usize, Option<usize>) {
193        (self.len, Some(self.len))
194    }
195}
196
197pub struct ExpLinkedListIntoIter<T> {
198    inner: std::collections::linked_list::IntoIter<Vec<T>>,
199    inner_iter: Option<std::vec::IntoIter<T>>,
200    len: usize,
201}
202
203impl<T> ExpLinkedListIntoIter<T> {
204    pub fn new(list: ExpLinkedList<T>) -> Self {
205        let len = list.len();
206        Self {
207            inner: list.inner.into_iter(),
208            inner_iter: None,
209            len,
210        }
211    }
212}
213
214impl<T> Iterator for ExpLinkedListIntoIter<T> {
215    type Item = T;
216
217    fn next(&mut self) -> Option<Self::Item> {
218        if let Some(inner_iter) = &mut self.inner_iter {
219            if let Some(v) = inner_iter.next() {
220                return Some(v);
221            }
222        }
223        if let Some(inner) = self.inner.next() {
224            self.inner_iter = Some(inner.into_iter());
225            return self.next();
226        }
227        None
228    }
229
230    fn size_hint(&self) -> (usize, Option<usize>) {
231        (self.len, Some(self.len))
232    }
233}
234
235impl<T> IntoIterator for ExpLinkedList<T> {
236    type Item = T;
237    type IntoIter = ExpLinkedListIntoIter<T>;
238
239    fn into_iter(self) -> Self::IntoIter {
240        ExpLinkedListIntoIter::new(self)
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    fn test_exp_linked_list(list: &mut ExpLinkedList<usize>) {
249        assert_eq!(list.len(), 100);
250        assert!(!list.is_empty());
251
252        // removes the last 50 elements
253        for i in 0..50 {
254            assert_eq!(list.pop(), Some(99 - i));
255        }
256        assert_eq!(list.len(), 50);
257        assert!(!list.is_empty());
258
259        // iterate over the list
260        for (i, v) in list.iter().enumerate() {
261            assert_eq!(*v, i);
262        }
263
264        // clear the list
265        list.clear();
266        assert_eq!(list.len(), 0);
267        assert!(list.is_empty());
268        assert_eq!(list.pop(), None);
269    }
270
271    #[test]
272    fn test_exp_linked_list_basic() {
273        let mut list = ExpLinkedList::new();
274        for i in 0..100 {
275            list.push(i);
276            assert_eq!(list.len(), i + 1);
277        }
278        test_exp_linked_list(&mut list);
279    }
280
281    #[test]
282    fn test_exp_linked_list_from() {
283        let mut list = (0..100).collect();
284        test_exp_linked_list(&mut list);
285    }
286
287    #[test]
288    fn test_exp_linked_list_with_capacity_limit() {
289        let mut list = ExpLinkedList::new().with_capacity_limit(10);
290        for i in 0..100 {
291            list.push(i);
292            assert_eq!(list.len(), i + 1);
293        }
294        assert_eq!(list.inner.back().unwrap().capacity(), 10);
295        test_exp_linked_list(&mut list);
296    }
297}