lance_core/container/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::LinkedList;
5
6use deepsize::DeepSizeOf;
7
8/// A linked list that grows exponentially. It is used to store a large number of
9/// elements in a memory-efficient way. The list grows by doubling the capacity of
10/// the last element when it's full, the capacity can be limited by the `limit`
11/// parameter. The default value is 0, which means no limit.
12#[derive(Debug, Clone, Default)]
13pub struct ExpLinkedList<T> {
14    inner: LinkedList<Vec<T>>,
15    len: usize,
16    // The maximum capacity of single node in the list.
17    // If the limit is 0, there is no limit.
18    // We use u16 to save memory because ExpLinkedList should not
19    // be used if the limit is that large.
20    limit: u16,
21}
22
23impl<T> ExpLinkedList<T> {
24    /// Creates a new empty `ExpLinkedList`.
25    pub fn new() -> Self {
26        Self {
27            inner: LinkedList::new(),
28            len: 0,
29            limit: 0,
30        }
31    }
32
33    pub fn with_capacity(capacity: usize) -> Self {
34        let mut inner = LinkedList::new();
35        inner.push_back(Vec::with_capacity(capacity));
36        Self {
37            inner,
38            len: 0,
39            limit: 0,
40        }
41    }
42
43    /// Creates a new `ExpLinkedList` with a specified capacity limit.
44    /// The limit is the maximum capacity of a single node in the list.
45    /// If the limit is 0, there is no limit.
46    pub fn with_capacity_limit(mut self, limit: u16) -> Self {
47        self.limit = limit;
48        self
49    }
50
51    /// Pushes a new element into the list. If the last element in the list
52    /// reaches its capacity, a new node is created with double capacity.
53    pub fn push(&mut self, v: T) {
54        match self.inner.back() {
55            Some(last) => {
56                if last.len() == last.capacity() {
57                    let new_cap = if self.limit > 0 && last.capacity() * 2 >= self.limit as usize {
58                        self.limit as usize
59                    } else {
60                        last.capacity() * 2
61                    };
62                    self.inner.push_back(Vec::with_capacity(new_cap));
63                }
64            }
65            None => {
66                self.inner.push_back(Vec::with_capacity(1));
67            }
68        }
69        self.do_push(v);
70    }
71
72    fn do_push(&mut self, v: T) {
73        self.inner.back_mut().unwrap().push(v);
74        self.len += 1;
75    }
76
77    /// Removes the last element from the list.
78    pub fn pop(&mut self) -> Option<T> {
79        match self.inner.back_mut() {
80            Some(last) => {
81                if last.is_empty() {
82                    self.inner.pop_back();
83                    self.pop()
84                } else {
85                    self.len -= 1;
86                    last.pop()
87                }
88            }
89            None => None,
90        }
91    }
92
93    /// Clears the list, removing all elements.
94    /// This will free the memory used by the list.
95    pub fn clear(&mut self) {
96        self.inner.clear();
97        self.len = 0;
98    }
99
100    /// Returns the number of elements in the list.
101    pub fn len(&self) -> usize {
102        self.len
103    }
104
105    /// Returns whether the list is empty.
106    pub fn is_empty(&self) -> bool {
107        self.inner.is_empty()
108    }
109
110    /// Returns the size of list, including the size of the elements and the
111    /// size of the list itself, and the unused space.
112    /// The element size is calculated using `std::mem::size_of::<T>()`,
113    /// so it is not accurate for all types.
114    /// For example, for `String`, it will return the size of the pointer,
115    /// not the size of the string itself. For that you need to use `DeepSizeOf`.
116    pub fn size(&self) -> usize {
117        let unused_space = match self.inner.back() {
118            Some(last) => last.capacity() - last.len(),
119            None => 0,
120        };
121        (self.len() + unused_space) * std::mem::size_of::<T>()
122            + std::mem::size_of::<Self>()
123            + self.inner.len() * std::mem::size_of::<Vec<T>>()
124    }
125
126    /// Returns an iterator over the elements in the list.
127    pub fn iter(&self) -> ExpLinkedListIter<'_, T> {
128        ExpLinkedListIter::new(self)
129    }
130
131    pub fn block_iter(&self) -> impl Iterator<Item = &[T]> {
132        self.inner.iter().map(|v| v.as_slice())
133    }
134}
135
136impl<T: DeepSizeOf> DeepSizeOf for ExpLinkedList<T> {
137    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
138        self.inner
139            .iter()
140            .map(|v| v.deep_size_of_children(context))
141            .sum()
142    }
143}
144
145impl<T> FromIterator<T> for ExpLinkedList<T> {
146    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
147        let iter = iter.into_iter();
148        let size_hint = iter.size_hint().0;
149        let cap = if size_hint > 0 { size_hint } else { 1 };
150        let mut list = Self::with_capacity(cap);
151        for item in iter {
152            list.push(item);
153        }
154        list
155    }
156}
157
158impl<T> PartialEq for ExpLinkedList<T>
159where
160    T: PartialEq,
161{
162    fn eq(&self, other: &Self) -> bool {
163        self.iter().zip(other.iter()).all(|(a, b)| a == b)
164    }
165}
166
167impl<T> Eq for ExpLinkedList<T> where T: Eq {}
168
169pub struct ExpLinkedListIter<'a, T> {
170    inner: std::collections::linked_list::Iter<'a, Vec<T>>,
171    inner_iter: Option<std::slice::Iter<'a, T>>,
172    len: usize,
173}
174
175impl<'a, T> ExpLinkedListIter<'a, T> {
176    pub fn new(inner: &'a ExpLinkedList<T>) -> Self {
177        Self {
178            inner: inner.inner.iter(),
179            inner_iter: None,
180            len: inner.len(),
181        }
182    }
183}
184
185impl<'a, T> Iterator for ExpLinkedListIter<'a, T> {
186    type Item = &'a T;
187
188    fn next(&mut self) -> Option<Self::Item> {
189        if let Some(inner_iter) = &mut self.inner_iter {
190            if let Some(v) = inner_iter.next() {
191                return Some(v);
192            }
193        }
194        if let Some(inner) = self.inner.next() {
195            self.inner_iter = Some(inner.iter());
196            return self.next();
197        }
198        None
199    }
200
201    fn size_hint(&self) -> (usize, Option<usize>) {
202        (self.len, Some(self.len))
203    }
204}
205
206pub struct ExpLinkedListIntoIter<T> {
207    inner: std::collections::linked_list::IntoIter<Vec<T>>,
208    inner_iter: Option<std::vec::IntoIter<T>>,
209    len: usize,
210}
211
212impl<T> ExpLinkedListIntoIter<T> {
213    pub fn new(list: ExpLinkedList<T>) -> Self {
214        let len = list.len();
215        Self {
216            inner: list.inner.into_iter(),
217            inner_iter: None,
218            len,
219        }
220    }
221}
222
223impl<T> Iterator for ExpLinkedListIntoIter<T> {
224    type Item = T;
225
226    fn next(&mut self) -> Option<Self::Item> {
227        if let Some(inner_iter) = &mut self.inner_iter {
228            if let Some(v) = inner_iter.next() {
229                return Some(v);
230            }
231        }
232        if let Some(inner) = self.inner.next() {
233            self.inner_iter = Some(inner.into_iter());
234            return self.next();
235        }
236        None
237    }
238
239    fn size_hint(&self) -> (usize, Option<usize>) {
240        (self.len, Some(self.len))
241    }
242}
243
244impl<T> IntoIterator for ExpLinkedList<T> {
245    type Item = T;
246    type IntoIter = ExpLinkedListIntoIter<T>;
247
248    fn into_iter(self) -> Self::IntoIter {
249        ExpLinkedListIntoIter::new(self)
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    fn test_exp_linked_list(list: &mut ExpLinkedList<usize>) {
258        assert_eq!(list.len(), 100);
259        assert!(!list.is_empty());
260
261        // removes the last 50 elements
262        for i in 0..50 {
263            assert_eq!(list.pop(), Some(99 - i));
264        }
265        assert_eq!(list.len(), 50);
266        assert!(!list.is_empty());
267
268        // iterate over the list
269        for (i, v) in list.iter().enumerate() {
270            assert_eq!(*v, i);
271        }
272
273        // clear the list
274        list.clear();
275        assert_eq!(list.len(), 0);
276        assert!(list.is_empty());
277        assert_eq!(list.pop(), None);
278    }
279
280    #[test]
281    fn test_exp_linked_list_basic() {
282        let mut list = ExpLinkedList::new();
283        for i in 0..100 {
284            list.push(i);
285            assert_eq!(list.len(), i + 1);
286        }
287        test_exp_linked_list(&mut list);
288    }
289
290    #[test]
291    fn test_exp_linked_list_from() {
292        let mut list = (0..100).collect();
293        test_exp_linked_list(&mut list);
294    }
295
296    #[test]
297    fn test_exp_linked_list_with_capacity_limit() {
298        let mut list = ExpLinkedList::new().with_capacity_limit(10);
299        for i in 0..100 {
300            list.push(i);
301            assert_eq!(list.len(), i + 1);
302        }
303        assert_eq!(list.inner.back().unwrap().capacity(), 10);
304        test_exp_linked_list(&mut list);
305    }
306}