Skip to main content

lock_db/
deadlock.rs

1//! Wait-for graph, deadlock detection, and victim selection.
2//!
3//! When a transaction cannot get a lock because another transaction holds it,
4//! the first *waits for* the second. Draw an edge from every waiter to every
5//! transaction it is blocked by and you get the **wait-for graph**. A cycle in
6//! that graph is a deadlock: a set of transactions each waiting for the next,
7//! none able to proceed. The only way out is to abort one of them — the
8//! *victim* — so the rest can continue.
9//!
10//! [`WaitForGraph`] holds the edges and finds cycles; [`pick_victim`] chooses
11//! which member of a cycle to abort. The graph is pure data — no locks, no
12//! threads — which keeps the detection logic small enough to test exhaustively.
13//! [`LockManager`](crate::LockManager) builds one of these from its live wait
14//! set, recomputed from the current lock table, so detection never acts on a
15//! stale edge and so never aborts a transaction that is not actually deadlocked.
16//!
17//! [`pick_victim`]: WaitForGraph::pick_victim
18
19use std::collections::HashMap;
20
21use crate::TxnId;
22
23/// How to choose which transaction in a deadlock cycle to abort.
24///
25/// Both policies break the cycle correctly; they differ only in which member
26/// pays. Transaction ids are taken as a proxy for age — a larger id is a
27/// transaction that started later.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30pub enum VictimPolicy {
31    /// Abort the youngest transaction in the cycle (the largest [`TxnId`]).
32    ///
33    /// The default. Aborting the most recently started transaction tends to
34    /// waste the least already-done work.
35    #[default]
36    Youngest,
37
38    /// Abort the oldest transaction in the cycle (the smallest [`TxnId`]).
39    ///
40    /// Useful when the oldest transaction is the one most likely to be stuck or
41    /// holding the most locks.
42    Oldest,
43}
44
45/// A detected deadlock: the cycle of transactions and the one chosen to abort.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct Deadlock {
48    /// The transaction selected for abort to break the cycle.
49    pub victim: TxnId,
50    /// The transactions forming the cycle, in wait-for order. Each waits for the
51    /// next, and the last waits for the first.
52    pub cycle: Vec<TxnId>,
53}
54
55/// A directed graph of which transactions are waiting for which.
56///
57/// An edge `a -> b` means transaction `a` is blocked waiting for a lock held by
58/// transaction `b`. The graph is a plain adjacency map; build it, then call
59/// [`detect_cycle`](Self::detect_cycle) or [`cycle_from`](Self::cycle_from) to
60/// find a deadlock.
61///
62/// # Examples
63///
64/// ```
65/// use lock_db::{TxnId, WaitForGraph, VictimPolicy};
66///
67/// let mut g = WaitForGraph::new();
68/// // T1 waits for T2, T2 waits for T1: a deadlock.
69/// g.add_wait(TxnId::new(1), TxnId::new(2));
70/// g.add_wait(TxnId::new(2), TxnId::new(1));
71///
72/// let cycle = g.detect_cycle().expect("a cycle exists");
73/// assert_eq!(cycle.len(), 2);
74/// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest), Some(TxnId::new(2)));
75/// ```
76#[derive(Debug, Clone, Default)]
77pub struct WaitForGraph {
78    edges: HashMap<TxnId, Vec<TxnId>>,
79}
80
81impl WaitForGraph {
82    /// Creates an empty graph.
83    #[must_use]
84    pub fn new() -> Self {
85        Self {
86            edges: HashMap::new(),
87        }
88    }
89
90    /// Records that `waiter` is blocked waiting for a lock held by `holder`.
91    ///
92    /// A self-edge (`waiter == holder`) is ignored: a transaction cannot
93    /// deadlock against itself.
94    ///
95    /// # Examples
96    ///
97    /// ```
98    /// use lock_db::{TxnId, WaitForGraph};
99    ///
100    /// let mut g = WaitForGraph::new();
101    /// g.add_wait(TxnId::new(1), TxnId::new(2));
102    /// assert_eq!(g.waiter_count(), 1);
103    /// g.add_wait(TxnId::new(3), TxnId::new(3)); // self-edge ignored
104    /// assert_eq!(g.waiter_count(), 1);
105    /// ```
106    pub fn add_wait(&mut self, waiter: TxnId, holder: TxnId) {
107        if waiter == holder {
108            return;
109        }
110        self.edges.entry(waiter).or_default().push(holder);
111    }
112
113    /// Records that `waiter` is blocked by every transaction in `holders`.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use lock_db::{TxnId, WaitForGraph};
119    ///
120    /// let mut g = WaitForGraph::new();
121    /// g.add_waits(TxnId::new(1), &[TxnId::new(2), TxnId::new(3)]);
122    /// // No cycle: 1 waits for 2 and 3, neither waits back.
123    /// assert!(g.detect_cycle().is_none());
124    /// ```
125    pub fn add_waits(&mut self, waiter: TxnId, holders: &[TxnId]) {
126        for &holder in holders {
127            self.add_wait(waiter, holder);
128        }
129    }
130
131    /// Removes every edge originating at `waiter` (it stopped waiting).
132    pub fn clear_waiter(&mut self, waiter: TxnId) {
133        let _ = self.edges.remove(&waiter);
134    }
135
136    /// Removes `txn` from the graph entirely — both its own waits and every edge
137    /// pointing at it.
138    pub fn remove_txn(&mut self, txn: TxnId) {
139        let _ = self.edges.remove(&txn);
140        for holders in self.edges.values_mut() {
141            holders.retain(|h| *h != txn);
142        }
143    }
144
145    /// Returns `true` if no transaction is waiting.
146    #[must_use]
147    pub fn is_empty(&self) -> bool {
148        self.edges.is_empty()
149    }
150
151    /// Returns the number of transactions that are waiting (have outgoing edges).
152    #[must_use]
153    pub fn waiter_count(&self) -> usize {
154        self.edges.len()
155    }
156
157    /// Returns a cycle in the graph if one exists, or `None`.
158    ///
159    /// The returned vector lists the transactions of one cycle in wait-for
160    /// order. When several cycles exist, which one is returned is unspecified.
161    ///
162    /// # Examples
163    ///
164    /// ```
165    /// use lock_db::{TxnId, WaitForGraph};
166    ///
167    /// let mut g = WaitForGraph::new();
168    /// // A chain, no cycle.
169    /// g.add_wait(TxnId::new(1), TxnId::new(2));
170    /// g.add_wait(TxnId::new(2), TxnId::new(3));
171    /// assert!(g.detect_cycle().is_none());
172    ///
173    /// // Close the loop.
174    /// g.add_wait(TxnId::new(3), TxnId::new(1));
175    /// assert_eq!(g.detect_cycle().map(|c| c.len()), Some(3));
176    /// ```
177    #[must_use]
178    pub fn detect_cycle(&self) -> Option<Vec<TxnId>> {
179        for &start in self.edges.keys() {
180            if let Some(cycle) = self.cycle_from(start) {
181                return Some(cycle);
182            }
183        }
184        None
185    }
186
187    /// Returns a cycle reachable from `start` if one exists, or `None`.
188    ///
189    /// Used for detection at the moment a new wait is added: the only cycle that
190    /// can have just formed is one reachable from the transaction that added the
191    /// edge.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use lock_db::{TxnId, WaitForGraph};
197    ///
198    /// let mut g = WaitForGraph::new();
199    /// g.add_wait(TxnId::new(1), TxnId::new(2));
200    /// g.add_wait(TxnId::new(2), TxnId::new(1));
201    /// assert!(g.cycle_from(TxnId::new(1)).is_some());
202    /// assert!(g.cycle_from(TxnId::new(9)).is_none()); // unknown txn
203    /// ```
204    #[must_use]
205    pub fn cycle_from(&self, start: TxnId) -> Option<Vec<TxnId>> {
206        // Iterative depth-first search. `state`: 0 unvisited, 1 on the current
207        // path, 2 fully explored. A back edge to a node still on the path is a
208        // cycle. Iterative rather than recursive so a long wait chain cannot
209        // overflow the stack.
210        let mut state: HashMap<TxnId, u8> = HashMap::new();
211        let mut path: Vec<TxnId> = Vec::new();
212        let mut stack: Vec<(TxnId, usize)> = Vec::new();
213
214        let _ = state.insert(start, 1);
215        path.push(start);
216        stack.push((start, 0));
217
218        while let Some(&(node, idx)) = stack.last() {
219            let neighbors: &[TxnId] = self.edges.get(&node).map_or(&[], Vec::as_slice);
220            if idx < neighbors.len() {
221                if let Some(top) = stack.last_mut() {
222                    top.1 += 1;
223                }
224                let next = neighbors[idx];
225                match state.get(&next).copied().unwrap_or(0) {
226                    1 => {
227                        if let Some(pos) = path.iter().position(|t| *t == next) {
228                            return Some(path[pos..].to_vec());
229                        }
230                    }
231                    0 => {
232                        let _ = state.insert(next, 1);
233                        path.push(next);
234                        stack.push((next, 0));
235                    }
236                    _ => {}
237                }
238            } else {
239                let _ = state.insert(node, 2);
240                let _ = path.pop();
241                let _ = stack.pop();
242            }
243        }
244        None
245    }
246
247    /// Chooses the transaction to abort from a deadlock cycle.
248    ///
249    /// Returns `None` only for an empty slice. See [`VictimPolicy`] for how the
250    /// choice is made.
251    ///
252    /// # Examples
253    ///
254    /// ```
255    /// use lock_db::{TxnId, VictimPolicy, WaitForGraph};
256    ///
257    /// let cycle = [TxnId::new(3), TxnId::new(7), TxnId::new(5)];
258    /// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest), Some(TxnId::new(7)));
259    /// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Oldest), Some(TxnId::new(3)));
260    /// ```
261    #[must_use]
262    pub fn pick_victim(cycle: &[TxnId], policy: VictimPolicy) -> Option<TxnId> {
263        match policy {
264            VictimPolicy::Youngest => cycle.iter().copied().max(),
265            VictimPolicy::Oldest => cycle.iter().copied().min(),
266        }
267    }
268}
269
270#[cfg(test)]
271#[allow(clippy::unwrap_used)]
272mod tests {
273    use super::{VictimPolicy, WaitForGraph};
274    use crate::TxnId;
275
276    fn t(id: u64) -> TxnId {
277        TxnId::new(id)
278    }
279
280    #[test]
281    fn test_empty_graph_has_no_cycle() {
282        let g = WaitForGraph::new();
283        assert!(g.is_empty());
284        assert!(g.detect_cycle().is_none());
285    }
286
287    #[test]
288    fn test_self_edge_ignored() {
289        let mut g = WaitForGraph::new();
290        g.add_wait(t(1), t(1));
291        assert!(g.is_empty());
292        assert!(g.detect_cycle().is_none());
293    }
294
295    #[test]
296    fn test_chain_has_no_cycle() {
297        let mut g = WaitForGraph::new();
298        g.add_wait(t(1), t(2));
299        g.add_wait(t(2), t(3));
300        g.add_wait(t(3), t(4));
301        assert!(g.detect_cycle().is_none());
302    }
303
304    #[test]
305    fn test_two_cycle_detected() {
306        let mut g = WaitForGraph::new();
307        g.add_wait(t(1), t(2));
308        g.add_wait(t(2), t(1));
309        let cycle = g.detect_cycle().unwrap();
310        assert_eq!(cycle.len(), 2);
311        assert!(cycle.contains(&t(1)) && cycle.contains(&t(2)));
312    }
313
314    #[test]
315    fn test_three_cycle_detected() {
316        let mut g = WaitForGraph::new();
317        g.add_wait(t(1), t(2));
318        g.add_wait(t(2), t(3));
319        g.add_wait(t(3), t(1));
320        let cycle = g.detect_cycle().unwrap();
321        assert_eq!(cycle.len(), 3);
322    }
323
324    #[test]
325    fn test_cycle_from_unknown_txn_is_none() {
326        let mut g = WaitForGraph::new();
327        g.add_wait(t(1), t(2));
328        g.add_wait(t(2), t(1));
329        assert!(g.cycle_from(t(99)).is_none());
330    }
331
332    #[test]
333    fn test_cycle_from_finds_cycle_containing_start() {
334        let mut g = WaitForGraph::new();
335        g.add_wait(t(1), t(2));
336        g.add_wait(t(2), t(3));
337        g.add_wait(t(3), t(2)); // cycle 2<->3, reachable from 1
338        let cycle = g.cycle_from(t(1)).unwrap();
339        assert!(cycle.contains(&t(2)) && cycle.contains(&t(3)));
340        assert!(!cycle.contains(&t(1))); // 1 is not in the cycle
341    }
342
343    #[test]
344    fn test_clear_waiter_breaks_cycle() {
345        let mut g = WaitForGraph::new();
346        g.add_wait(t(1), t(2));
347        g.add_wait(t(2), t(1));
348        g.clear_waiter(t(1));
349        assert!(g.detect_cycle().is_none());
350    }
351
352    #[test]
353    fn test_remove_txn_drops_incoming_and_outgoing() {
354        let mut g = WaitForGraph::new();
355        g.add_wait(t(1), t(2));
356        g.add_wait(t(2), t(1));
357        g.add_wait(t(3), t(2));
358        g.remove_txn(t(2));
359        assert!(g.detect_cycle().is_none());
360        // t(3)'s edge to the removed t(2) is gone.
361        assert!(g.cycle_from(t(3)).is_none());
362    }
363
364    #[test]
365    fn test_diamond_no_cycle() {
366        // 1->2, 1->3, 2->4, 3->4: a DAG, no cycle.
367        let mut g = WaitForGraph::new();
368        g.add_wait(t(1), t(2));
369        g.add_wait(t(1), t(3));
370        g.add_wait(t(2), t(4));
371        g.add_wait(t(3), t(4));
372        assert!(g.detect_cycle().is_none());
373    }
374
375    #[test]
376    fn test_pick_victim_policies() {
377        let cycle = [t(3), t(7), t(5)];
378        assert_eq!(
379            WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest),
380            Some(t(7))
381        );
382        assert_eq!(
383            WaitForGraph::pick_victim(&cycle, VictimPolicy::Oldest),
384            Some(t(3))
385        );
386        assert_eq!(WaitForGraph::pick_victim(&[], VictimPolicy::Youngest), None);
387    }
388
389    #[test]
390    fn test_detected_cycle_is_an_actual_cycle() {
391        // Every consecutive pair in the returned cycle must be a real edge, and
392        // the last must wait for the first.
393        let mut g = WaitForGraph::new();
394        g.add_wait(t(1), t(2));
395        g.add_wait(t(2), t(3));
396        g.add_wait(t(3), t(1));
397        let cycle = g.detect_cycle().unwrap();
398        for i in 0..cycle.len() {
399            let from = cycle[i];
400            let to = cycle[(i + 1) % cycle.len()];
401            let edges = g.edges.get(&from).unwrap();
402            assert!(edges.contains(&to), "missing edge {from:?} -> {to:?}");
403        }
404    }
405
406    #[test]
407    fn test_default_policy_is_youngest() {
408        assert_eq!(VictimPolicy::default(), VictimPolicy::Youngest);
409    }
410}