pingora_lru/
linked_list.rs

1// Copyright 2024 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Can't tell people you know Rust until you write a (doubly) linked list
16
17//! Doubly linked list
18//!
19//! Features
20//! - Preallocate consecutive memory, no memory fragmentation.
21//! - No shrink function: for Lru cache that grows to a certain size but never shrinks.
22//! - Relatively fast and efficient.
23
24// inspired by clru::FixedSizeList (Élie!)
25
26use std::mem::replace;
27
28type Index = usize;
29const NULL: Index = usize::MAX;
30const HEAD: Index = 0;
31const TAIL: Index = 1;
32const OFFSET: usize = 2;
33
34#[derive(Debug)]
35struct Node {
36    pub(crate) prev: Index,
37    pub(crate) next: Index,
38    pub(crate) data: u64,
39}
40
41// Functionally the same as vec![head, tail, data_nodes...] where head & tail are fixed and
42// the rest data nodes can expand. Both head and tail can be accessed faster than using index
43struct Nodes {
44    // we use these sentinel nodes to guard the head and tail of the list so that list
45    // manipulation is simpler (fewer if-else)
46    head: Node,
47    tail: Node,
48    data_nodes: Vec<Node>,
49}
50
51impl Nodes {
52    fn with_capacity(capacity: usize) -> Self {
53        Nodes {
54            head: Node {
55                prev: NULL,
56                next: TAIL,
57                data: 0,
58            },
59            tail: Node {
60                prev: HEAD,
61                next: NULL,
62                data: 0,
63            },
64            data_nodes: Vec::with_capacity(capacity),
65        }
66    }
67
68    fn new_node(&mut self, data: u64) -> Index {
69        const VEC_EXP_GROWTH_CAP: usize = 65536;
70        let node = Node {
71            prev: NULL,
72            next: NULL,
73            data,
74        };
75        // Constrain the growth of vec: vec always double its capacity when it needs to grow.
76        // It could waste too much memory when it is already very large.
77        // Here we limit the memory waste to 10% once it grows beyond the cap.
78        // The amortized growth cost is O(n) beyond the max of the initially reserved capacity and
79        // the cap. But this list is for limited sized LRU and we recycle released node, so
80        // hopefully insertions are rare beyond certain sizes
81        if self.data_nodes.capacity() > VEC_EXP_GROWTH_CAP
82            && self.data_nodes.capacity() - self.data_nodes.len() < 2
83        {
84            self.data_nodes
85                .reserve_exact(self.data_nodes.capacity() / 10)
86        }
87        self.data_nodes.push(node);
88        self.data_nodes.len() - 1 + OFFSET
89    }
90
91    fn len(&self) -> usize {
92        self.data_nodes.len()
93    }
94
95    fn head(&self) -> &Node {
96        &self.head
97    }
98
99    fn tail(&self) -> &Node {
100        &self.tail
101    }
102}
103
104impl std::ops::Index<usize> for Nodes {
105    type Output = Node;
106
107    fn index(&self, index: usize) -> &Self::Output {
108        match index {
109            HEAD => &self.head,
110            TAIL => &self.tail,
111            _ => &self.data_nodes[index - OFFSET],
112        }
113    }
114}
115
116impl std::ops::IndexMut<usize> for Nodes {
117    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
118        match index {
119            HEAD => &mut self.head,
120            TAIL => &mut self.tail,
121            _ => &mut self.data_nodes[index - OFFSET],
122        }
123    }
124}
125
126/// Doubly linked list
127pub struct LinkedList {
128    nodes: Nodes,
129    free: Vec<Index>, // to keep track of freed node to be used again
130}
131// Panic when index used as parameters are invalid
132// Index returned by push_* is always valid.
133impl LinkedList {
134    /// Create a [LinkedList] with the given predicted capacity.
135    pub fn with_capacity(capacity: usize) -> Self {
136        LinkedList {
137            nodes: Nodes::with_capacity(capacity),
138            free: vec![],
139        }
140    }
141
142    // Allocate a new node and return its index
143    // NOTE: this node is leaked if not used by caller
144    fn new_node(&mut self, data: u64) -> Index {
145        if let Some(index) = self.free.pop() {
146            // have a free node, update its payload and return its index
147            self.nodes[index].data = data;
148            index
149        } else {
150            // create a new node
151            self.nodes.new_node(data)
152        }
153    }
154
155    /// How many nodes in the list
156    #[allow(clippy::len_without_is_empty)]
157    pub fn len(&self) -> usize {
158        // exclude the 2 sentinels
159        self.nodes.len() - self.free.len()
160    }
161
162    fn valid_index(&self, index: Index) -> bool {
163        index != HEAD && index != TAIL && index < self.nodes.len() + OFFSET
164        // TODO: check node prev/next not NULL
165        // TODO: debug_check index not in self.free
166    }
167
168    fn node(&self, index: Index) -> Option<&Node> {
169        if self.valid_index(index) {
170            Some(&self.nodes[index])
171        } else {
172            None
173        }
174    }
175
176    /// Peek into the list
177    pub fn peek(&self, index: Index) -> Option<u64> {
178        self.node(index).map(|n| n.data)
179    }
180
181    // safe because the index still needs to be in the range of the vec
182    fn peek_unchecked(&self, index: Index) -> &u64 {
183        &self.nodes[index].data
184    }
185
186    /// Whether the value exists closed (up to search_limit nodes) to the head of the list
187    // It can be done via iter().take().find() but this is cheaper
188    pub fn exist_near_head(&self, value: u64, search_limit: usize) -> bool {
189        let mut current_node = HEAD;
190        for _ in 0..search_limit {
191            current_node = self.nodes[current_node].next;
192            if current_node == TAIL {
193                return false;
194            }
195            if self.nodes[current_node].data == value {
196                return true;
197            }
198        }
199        false
200    }
201
202    // put a node right after the node at `at`
203    fn insert_after(&mut self, node_index: Index, at: Index) {
204        assert!(at != TAIL && at != node_index); // can't insert after tail or to itself
205
206        let next = replace(&mut self.nodes[at].next, node_index);
207
208        let node = &mut self.nodes[node_index];
209        node.next = next;
210        node.prev = at;
211
212        self.nodes[next].prev = node_index;
213    }
214
215    /// Put the data at the head of the list.
216    pub fn push_head(&mut self, data: u64) -> Index {
217        let new_node_index = self.new_node(data);
218        self.insert_after(new_node_index, HEAD);
219        new_node_index
220    }
221
222    /// Put the data at the tail of the list.
223    pub fn push_tail(&mut self, data: u64) -> Index {
224        let new_node_index = self.new_node(data);
225        self.insert_after(new_node_index, self.nodes.tail().prev);
226        new_node_index
227    }
228
229    // lift the node out of the linked list, to either delete it or insert to another place
230    // NOTE: the node is leaked if not used by the caller
231    fn lift(&mut self, index: Index) -> u64 {
232        // can't touch the sentinels
233        assert!(index != HEAD && index != TAIL);
234
235        let node = &mut self.nodes[index];
236
237        // zero out the pointers, useful in case we try to access a freed node
238        let prev = replace(&mut node.prev, NULL);
239        let next = replace(&mut node.next, NULL);
240        let data = node.data;
241
242        // make sure we are accessing a node in the list, not freed already
243        assert!(prev != NULL && next != NULL);
244
245        self.nodes[prev].next = next;
246        self.nodes[next].prev = prev;
247
248        data
249    }
250
251    /// Remove the node at the index, and return the value
252    pub fn remove(&mut self, index: Index) -> u64 {
253        self.free.push(index);
254        self.lift(index)
255    }
256
257    /// Remove the tail of the list
258    pub fn pop_tail(&mut self) -> Option<u64> {
259        let data_tail = self.nodes.tail().prev;
260        if data_tail == HEAD {
261            None // empty list
262        } else {
263            Some(self.remove(data_tail))
264        }
265    }
266
267    /// Put the node at the index to the head
268    pub fn promote(&mut self, index: Index) {
269        if self.nodes.head().next == index {
270            return; // already head
271        }
272        self.lift(index);
273        self.insert_after(index, HEAD);
274    }
275
276    fn next(&self, index: Index) -> Index {
277        self.nodes[index].next
278    }
279
280    fn prev(&self, index: Index) -> Index {
281        self.nodes[index].prev
282    }
283
284    /// Get the head of the list
285    pub fn head(&self) -> Option<Index> {
286        let data_head = self.nodes.head().next;
287        if data_head == TAIL {
288            None
289        } else {
290            Some(data_head)
291        }
292    }
293
294    /// Get the tail of the list
295    pub fn tail(&self) -> Option<Index> {
296        let data_tail = self.nodes.tail().prev;
297        if data_tail == HEAD {
298            None
299        } else {
300            Some(data_tail)
301        }
302    }
303
304    /// Iterate over the list
305    pub fn iter(&self) -> LinkedListIter<'_> {
306        LinkedListIter {
307            list: self,
308            head: HEAD,
309            tail: TAIL,
310            len: self.len(),
311        }
312    }
313}
314
315/// The iter over the list
316pub struct LinkedListIter<'a> {
317    list: &'a LinkedList,
318    head: Index,
319    tail: Index,
320    len: usize,
321}
322
323impl<'a> Iterator for LinkedListIter<'a> {
324    type Item = &'a u64;
325
326    fn next(&mut self) -> Option<Self::Item> {
327        let next_index = self.list.next(self.head);
328        if next_index == TAIL || next_index == NULL {
329            None
330        } else {
331            self.head = next_index;
332            self.len -= 1;
333            Some(self.list.peek_unchecked(next_index))
334        }
335    }
336
337    fn size_hint(&self) -> (usize, Option<usize>) {
338        (self.len, Some(self.len))
339    }
340}
341
342impl<'a> DoubleEndedIterator for LinkedListIter<'a> {
343    fn next_back(&mut self) -> Option<Self::Item> {
344        let prev_index = self.list.prev(self.tail);
345        if prev_index == HEAD || prev_index == NULL {
346            None
347        } else {
348            self.tail = prev_index;
349            self.len -= 1;
350            Some(self.list.peek_unchecked(prev_index))
351        }
352    }
353}
354
355#[cfg(test)]
356mod test {
357    use super::*;
358
359    // assert the list is the same as `values`
360    fn assert_list(list: &LinkedList, values: &[u64]) {
361        let list_values: Vec<_> = list.iter().copied().collect();
362        assert_eq!(values, &list_values)
363    }
364
365    fn assert_list_reverse(list: &LinkedList, values: &[u64]) {
366        let list_values: Vec<_> = list.iter().rev().copied().collect();
367        assert_eq!(values, &list_values)
368    }
369
370    #[test]
371    fn test_insert() {
372        let mut list = LinkedList::with_capacity(10);
373        assert_eq!(list.len(), 0);
374        assert!(list.node(2).is_none());
375        assert_eq!(list.head(), None);
376        assert_eq!(list.tail(), None);
377
378        let index1 = list.push_head(2);
379        assert_eq!(list.len(), 1);
380        assert_eq!(list.peek(index1).unwrap(), 2);
381
382        let index2 = list.push_head(3);
383        assert_eq!(list.head(), Some(index2));
384        assert_eq!(list.tail(), Some(index1));
385
386        let index3 = list.push_tail(4);
387        assert_eq!(list.head(), Some(index2));
388        assert_eq!(list.tail(), Some(index3));
389
390        assert_list(&list, &[3, 2, 4]);
391        assert_list_reverse(&list, &[4, 2, 3]);
392    }
393
394    #[test]
395    fn test_pop() {
396        let mut list = LinkedList::with_capacity(10);
397        list.push_head(2);
398        list.push_head(3);
399        list.push_tail(4);
400        assert_list(&list, &[3, 2, 4]);
401        assert_eq!(list.pop_tail(), Some(4));
402        assert_eq!(list.pop_tail(), Some(2));
403        assert_eq!(list.pop_tail(), Some(3));
404        assert_eq!(list.pop_tail(), None);
405    }
406
407    #[test]
408    fn test_promote() {
409        let mut list = LinkedList::with_capacity(10);
410        let index2 = list.push_head(2);
411        let index3 = list.push_head(3);
412        let index4 = list.push_tail(4);
413        assert_list(&list, &[3, 2, 4]);
414
415        list.promote(index3);
416        assert_list(&list, &[3, 2, 4]);
417
418        list.promote(index2);
419        assert_list(&list, &[2, 3, 4]);
420
421        list.promote(index4);
422        assert_list(&list, &[4, 2, 3]);
423    }
424
425    #[test]
426    fn test_exist_near_head() {
427        let mut list = LinkedList::with_capacity(10);
428        list.push_head(2);
429        list.push_head(3);
430        list.push_tail(4);
431        assert_list(&list, &[3, 2, 4]);
432
433        assert!(!list.exist_near_head(4, 1));
434        assert!(!list.exist_near_head(4, 2));
435        assert!(list.exist_near_head(4, 3));
436        assert!(list.exist_near_head(4, 4));
437        assert!(list.exist_near_head(4, 99999));
438    }
439}