lance_core/container/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::collections::LinkedList;
5
6use deepsize::DeepSizeOf;
7
8/// A linked list that grows exponentially. It is used to store a large number of
9/// elements in a memory-efficient way. The list grows by doubling the capacity of
10/// the last element when it's full, the capacity can be limited by the `limit`
11/// parameter. The default value is 0, which means no limit.
12#[derive(Debug, Clone, Default)]
13pub struct ExpLinkedList<T> {
14    inner: LinkedList<Vec<T>>,
15    len: usize,
16    // The maximum capacity of single node in the list.
17    // If the limit is 0, there is no limit.
18    // We use u16 to save memory because ExpLinkedList should not
19    // be used if the limit is that large.
20    limit: u16,
21}
22
23impl<T> ExpLinkedList<T> {
24    /// Creates a new empty `ExpLinkedList`.
25    pub fn new() -> Self {
26        Self {
27            inner: LinkedList::new(),
28            len: 0,
29            limit: 0,
30        }
31    }
32
33    pub fn with_capacity(capacity: usize) -> Self {
34        let mut inner = LinkedList::new();
35        inner.push_back(Vec::with_capacity(capacity));
36        Self {
37            inner,
38            len: 0,
39            limit: 0,
40        }
41    }
42
43    /// Creates a new `ExpLinkedList` with a specified capacity limit.
44    /// The limit is the maximum capacity of a single node in the list.
45    /// If the limit is 0, there is no limit.
46    pub fn with_capacity_limit(limit: u16) -> Self {
47        Self {
48            inner: LinkedList::new(),
49            len: 0,
50            limit,
51        }
52    }
53
54    /// Pushes a new element into the list. If the last element in the list
55    /// reaches its capacity, a new node is created with double capacity.
56    pub fn push(&mut self, v: T) {
57        match self.inner.back() {
58            Some(last) => {
59                if last.len() == last.capacity() {
60                    let new_cap = if self.limit > 0 && last.capacity() * 2 >= self.limit as usize {
61                        self.limit as usize
62                    } else {
63                        last.capacity() * 2
64                    };
65                    self.inner.push_back(Vec::with_capacity(new_cap));
66                }
67            }
68            None => {
69                self.inner.push_back(Vec::with_capacity(1));
70            }
71        }
72        self.do_push(v);
73    }
74
75    fn do_push(&mut self, v: T) {
76        self.inner.back_mut().unwrap().push(v);
77        self.len += 1;
78    }
79
80    /// Removes the last element from the list.
81    pub fn pop(&mut self) -> Option<T> {
82        match self.inner.back_mut() {
83            Some(last) => {
84                if last.is_empty() {
85                    self.inner.pop_back();
86                    self.pop()
87                } else {
88                    self.len -= 1;
89                    last.pop()
90                }
91            }
92            None => None,
93        }
94    }
95
96    /// Clears the list, removing all elements.
97    /// This will free the memory used by the list.
98    pub fn clear(&mut self) {
99        self.inner.clear();
100        self.len = 0;
101    }
102
103    /// Returns the number of elements in the list.
104    pub fn len(&self) -> usize {
105        self.len
106    }
107
108    /// Returns whether the list is empty.
109    pub fn is_empty(&self) -> bool {
110        self.inner.is_empty()
111    }
112
113    /// Returns the size of list, including the size of the elements and the
114    /// size of the list itself, and the unused space.
115    /// The element size is calculated using `std::mem::size_of::<T>()`,
116    /// so it is not accurate for all types.
117    /// For example, for `String`, it will return the size of the pointer,
118    /// not the size of the string itself. For that you need to use `DeepSizeOf`.
119    pub fn size(&self) -> usize {
120        let unused_space = match self.inner.back() {
121            Some(last) => last.capacity() - last.len(),
122            None => 0,
123        };
124        (self.len() + unused_space) * std::mem::size_of::<T>()
125            + std::mem::size_of::<Self>()
126            + self.inner.len() * std::mem::size_of::<Vec<T>>()
127    }
128
129    /// Returns an iterator over the elements in the list.
130    pub fn iter(&self) -> ExpLinkedListIter<'_, T> {
131        ExpLinkedListIter::new(self)
132    }
133}
134
135impl<T: DeepSizeOf> DeepSizeOf for ExpLinkedList<T> {
136    fn deep_size_of_children(&self, context: &mut deepsize::Context) -> usize {
137        self.inner
138            .iter()
139            .map(|v| v.deep_size_of_children(context))
140            .sum()
141    }
142}
143
144impl<T> FromIterator<T> for ExpLinkedList<T> {
145    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
146        let iter = iter.into_iter();
147        let size_hint = iter.size_hint().0;
148        let cap = if size_hint > 0 { size_hint } else { 1 };
149        let mut list = Self::with_capacity(cap);
150        for item in iter {
151            list.push(item);
152        }
153        list
154    }
155}
156
157pub struct ExpLinkedListIter<'a, T> {
158    inner: std::collections::linked_list::Iter<'a, Vec<T>>,
159    inner_iter: Option<std::slice::Iter<'a, T>>,
160}
161
162impl<'a, T> ExpLinkedListIter<'a, T> {
163    pub fn new(inner: &'a ExpLinkedList<T>) -> Self {
164        Self {
165            inner: inner.inner.iter(),
166            inner_iter: None,
167        }
168    }
169}
170
171impl<'a, T> Iterator for ExpLinkedListIter<'a, T> {
172    type Item = &'a T;
173
174    fn next(&mut self) -> Option<Self::Item> {
175        if let Some(inner_iter) = &mut self.inner_iter {
176            if let Some(v) = inner_iter.next() {
177                return Some(v);
178            }
179        }
180        if let Some(inner) = self.inner.next() {
181            self.inner_iter = Some(inner.iter());
182            return self.next();
183        }
184        None
185    }
186}
187
188pub struct ExpLinkedListIntoIter<T> {
189    inner: std::collections::linked_list::IntoIter<Vec<T>>,
190    inner_iter: Option<std::vec::IntoIter<T>>,
191    len: usize,
192}
193
194impl<T> ExpLinkedListIntoIter<T> {
195    pub fn new(list: ExpLinkedList<T>) -> Self {
196        let len = list.len();
197        Self {
198            inner: list.inner.into_iter(),
199            inner_iter: None,
200            len,
201        }
202    }
203}
204
205impl<T> Iterator for ExpLinkedListIntoIter<T> {
206    type Item = T;
207
208    fn next(&mut self) -> Option<Self::Item> {
209        if let Some(inner_iter) = &mut self.inner_iter {
210            if let Some(v) = inner_iter.next() {
211                return Some(v);
212            }
213        }
214        if let Some(inner) = self.inner.next() {
215            self.inner_iter = Some(inner.into_iter());
216            return self.next();
217        }
218        None
219    }
220
221    fn size_hint(&self) -> (usize, Option<usize>) {
222        (self.len, Some(self.len))
223    }
224}
225
226impl<T> IntoIterator for ExpLinkedList<T> {
227    type Item = T;
228    type IntoIter = ExpLinkedListIntoIter<T>;
229
230    fn into_iter(self) -> Self::IntoIter {
231        ExpLinkedListIntoIter::new(self)
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    fn test_exp_linked_list(list: &mut ExpLinkedList<usize>) {
240        assert_eq!(list.len(), 100);
241        assert!(!list.is_empty());
242
243        // removes the last 50 elements
244        for i in 0..50 {
245            assert_eq!(list.pop(), Some(99 - i));
246        }
247        assert_eq!(list.len(), 50);
248        assert!(!list.is_empty());
249
250        // iterate over the list
251        for (i, v) in list.iter().enumerate() {
252            assert_eq!(*v, i);
253        }
254
255        // clear the list
256        list.clear();
257        assert_eq!(list.len(), 0);
258        assert!(list.is_empty());
259        assert_eq!(list.pop(), None);
260    }
261
262    #[test]
263    fn test_exp_linked_list_basic() {
264        let mut list = ExpLinkedList::new();
265        for i in 0..100 {
266            list.push(i);
267            assert_eq!(list.len(), i + 1);
268        }
269        test_exp_linked_list(&mut list);
270    }
271
272    #[test]
273    fn test_exp_linked_list_from() {
274        let mut list = (0..100).collect();
275        test_exp_linked_list(&mut list);
276    }
277
278    #[test]
279    fn test_exp_linked_list_with_capacity_limit() {
280        let mut list = ExpLinkedList::with_capacity_limit(10);
281        for i in 0..100 {
282            list.push(i);
283            assert_eq!(list.len(), i + 1);
284        }
285        assert_eq!(list.inner.back().unwrap().capacity(), 10);
286        test_exp_linked_list(&mut list);
287    }
288}