lock_db/deadlock.rs
1//! Wait-for graph, deadlock detection, and victim selection.
2//!
3//! When a transaction cannot get a lock because another transaction holds it,
4//! the first *waits for* the second. Draw an edge from every waiter to every
5//! transaction it is blocked by and you get the **wait-for graph**. A cycle in
6//! that graph is a deadlock: a set of transactions each waiting for the next,
7//! none able to proceed. The only way out is to abort one of them — the
8//! *victim* — so the rest can continue.
9//!
10//! [`WaitForGraph`] holds the edges and finds cycles; [`pick_victim`] chooses
11//! which member of a cycle to abort. The graph is pure data — no locks, no
12//! threads — which keeps the detection logic small enough to test exhaustively.
13//! [`LockManager`](crate::LockManager) builds one of these from its live wait
14//! set, recomputed from the current lock table, so detection never acts on a
15//! stale edge and so never aborts a transaction that is not actually deadlocked.
16//!
17//! [`pick_victim`]: WaitForGraph::pick_victim
18
19use std::collections::HashMap;
20
21use crate::TxnId;
22
23/// How to choose which transaction in a deadlock cycle to abort.
24///
25/// Both policies break the cycle correctly; they differ only in which member
26/// pays. Transaction ids are taken as a proxy for age — a larger id is a
27/// transaction that started later.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30pub enum VictimPolicy {
31 /// Abort the youngest transaction in the cycle (the largest [`TxnId`]).
32 ///
33 /// The default. Aborting the most recently started transaction tends to
34 /// waste the least already-done work.
35 #[default]
36 Youngest,
37
38 /// Abort the oldest transaction in the cycle (the smallest [`TxnId`]).
39 ///
40 /// Useful when the oldest transaction is the one most likely to be stuck or
41 /// holding the most locks.
42 Oldest,
43}
44
45/// A detected deadlock: the cycle of transactions and the one chosen to abort.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct Deadlock {
48 /// The transaction selected for abort to break the cycle.
49 pub victim: TxnId,
50 /// The transactions forming the cycle, in wait-for order. Each waits for the
51 /// next, and the last waits for the first.
52 pub cycle: Vec<TxnId>,
53}
54
55/// A directed graph of which transactions are waiting for which.
56///
57/// An edge `a -> b` means transaction `a` is blocked waiting for a lock held by
58/// transaction `b`. The graph is a plain adjacency map; build it, then call
59/// [`detect_cycle`](Self::detect_cycle) or [`cycle_from`](Self::cycle_from) to
60/// find a deadlock.
61///
62/// # Examples
63///
64/// ```
65/// use lock_db::{TxnId, WaitForGraph, VictimPolicy};
66///
67/// let mut g = WaitForGraph::new();
68/// // T1 waits for T2, T2 waits for T1: a deadlock.
69/// g.add_wait(TxnId::new(1), TxnId::new(2));
70/// g.add_wait(TxnId::new(2), TxnId::new(1));
71///
72/// let cycle = g.detect_cycle().expect("a cycle exists");
73/// assert_eq!(cycle.len(), 2);
74/// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest), Some(TxnId::new(2)));
75/// ```
76#[derive(Debug, Clone, Default)]
77pub struct WaitForGraph {
78 edges: HashMap<TxnId, Vec<TxnId>>,
79}
80
81impl WaitForGraph {
82 /// Creates an empty graph.
83 #[must_use]
84 pub fn new() -> Self {
85 Self {
86 edges: HashMap::new(),
87 }
88 }
89
90 /// Records that `waiter` is blocked waiting for a lock held by `holder`.
91 ///
92 /// A self-edge (`waiter == holder`) is ignored: a transaction cannot
93 /// deadlock against itself.
94 ///
95 /// # Examples
96 ///
97 /// ```
98 /// use lock_db::{TxnId, WaitForGraph};
99 ///
100 /// let mut g = WaitForGraph::new();
101 /// g.add_wait(TxnId::new(1), TxnId::new(2));
102 /// assert_eq!(g.waiter_count(), 1);
103 /// g.add_wait(TxnId::new(3), TxnId::new(3)); // self-edge ignored
104 /// assert_eq!(g.waiter_count(), 1);
105 /// ```
106 pub fn add_wait(&mut self, waiter: TxnId, holder: TxnId) {
107 if waiter == holder {
108 return;
109 }
110 self.edges.entry(waiter).or_default().push(holder);
111 }
112
113 /// Records that `waiter` is blocked by every transaction in `holders`.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use lock_db::{TxnId, WaitForGraph};
119 ///
120 /// let mut g = WaitForGraph::new();
121 /// g.add_waits(TxnId::new(1), &[TxnId::new(2), TxnId::new(3)]);
122 /// // No cycle: 1 waits for 2 and 3, neither waits back.
123 /// assert!(g.detect_cycle().is_none());
124 /// ```
125 pub fn add_waits(&mut self, waiter: TxnId, holders: &[TxnId]) {
126 for &holder in holders {
127 self.add_wait(waiter, holder);
128 }
129 }
130
131 /// Removes every edge originating at `waiter` (it stopped waiting).
132 pub fn clear_waiter(&mut self, waiter: TxnId) {
133 let _ = self.edges.remove(&waiter);
134 }
135
136 /// Removes `txn` from the graph entirely — both its own waits and every edge
137 /// pointing at it.
138 pub fn remove_txn(&mut self, txn: TxnId) {
139 let _ = self.edges.remove(&txn);
140 for holders in self.edges.values_mut() {
141 holders.retain(|h| *h != txn);
142 }
143 }
144
145 /// Returns `true` if no transaction is waiting.
146 #[must_use]
147 pub fn is_empty(&self) -> bool {
148 self.edges.is_empty()
149 }
150
151 /// Returns the number of transactions that are waiting (have outgoing edges).
152 #[must_use]
153 pub fn waiter_count(&self) -> usize {
154 self.edges.len()
155 }
156
157 /// Returns a cycle in the graph if one exists, or `None`.
158 ///
159 /// The returned vector lists the transactions of one cycle in wait-for
160 /// order. When several cycles exist, which one is returned is unspecified.
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// use lock_db::{TxnId, WaitForGraph};
166 ///
167 /// let mut g = WaitForGraph::new();
168 /// // A chain, no cycle.
169 /// g.add_wait(TxnId::new(1), TxnId::new(2));
170 /// g.add_wait(TxnId::new(2), TxnId::new(3));
171 /// assert!(g.detect_cycle().is_none());
172 ///
173 /// // Close the loop.
174 /// g.add_wait(TxnId::new(3), TxnId::new(1));
175 /// assert_eq!(g.detect_cycle().map(|c| c.len()), Some(3));
176 /// ```
177 #[must_use]
178 pub fn detect_cycle(&self) -> Option<Vec<TxnId>> {
179 for &start in self.edges.keys() {
180 if let Some(cycle) = self.cycle_from(start) {
181 return Some(cycle);
182 }
183 }
184 None
185 }
186
187 /// Returns a cycle reachable from `start` if one exists, or `None`.
188 ///
189 /// Used for detection at the moment a new wait is added: the only cycle that
190 /// can have just formed is one reachable from the transaction that added the
191 /// edge.
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// use lock_db::{TxnId, WaitForGraph};
197 ///
198 /// let mut g = WaitForGraph::new();
199 /// g.add_wait(TxnId::new(1), TxnId::new(2));
200 /// g.add_wait(TxnId::new(2), TxnId::new(1));
201 /// assert!(g.cycle_from(TxnId::new(1)).is_some());
202 /// assert!(g.cycle_from(TxnId::new(9)).is_none()); // unknown txn
203 /// ```
204 #[must_use]
205 pub fn cycle_from(&self, start: TxnId) -> Option<Vec<TxnId>> {
206 // Iterative depth-first search. `state`: 0 unvisited, 1 on the current
207 // path, 2 fully explored. A back edge to a node still on the path is a
208 // cycle. Iterative rather than recursive so a long wait chain cannot
209 // overflow the stack.
210 let mut state: HashMap<TxnId, u8> = HashMap::new();
211 let mut path: Vec<TxnId> = Vec::new();
212 let mut stack: Vec<(TxnId, usize)> = Vec::new();
213
214 let _ = state.insert(start, 1);
215 path.push(start);
216 stack.push((start, 0));
217
218 while let Some(&(node, idx)) = stack.last() {
219 let neighbors: &[TxnId] = self.edges.get(&node).map_or(&[], Vec::as_slice);
220 if idx < neighbors.len() {
221 if let Some(top) = stack.last_mut() {
222 top.1 += 1;
223 }
224 let next = neighbors[idx];
225 match state.get(&next).copied().unwrap_or(0) {
226 1 => {
227 if let Some(pos) = path.iter().position(|t| *t == next) {
228 return Some(path[pos..].to_vec());
229 }
230 }
231 0 => {
232 let _ = state.insert(next, 1);
233 path.push(next);
234 stack.push((next, 0));
235 }
236 _ => {}
237 }
238 } else {
239 let _ = state.insert(node, 2);
240 let _ = path.pop();
241 let _ = stack.pop();
242 }
243 }
244 None
245 }
246
247 /// Chooses the transaction to abort from a deadlock cycle.
248 ///
249 /// Returns `None` only for an empty slice. See [`VictimPolicy`] for how the
250 /// choice is made.
251 ///
252 /// # Examples
253 ///
254 /// ```
255 /// use lock_db::{TxnId, VictimPolicy, WaitForGraph};
256 ///
257 /// let cycle = [TxnId::new(3), TxnId::new(7), TxnId::new(5)];
258 /// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest), Some(TxnId::new(7)));
259 /// assert_eq!(WaitForGraph::pick_victim(&cycle, VictimPolicy::Oldest), Some(TxnId::new(3)));
260 /// ```
261 #[must_use]
262 pub fn pick_victim(cycle: &[TxnId], policy: VictimPolicy) -> Option<TxnId> {
263 match policy {
264 VictimPolicy::Youngest => cycle.iter().copied().max(),
265 VictimPolicy::Oldest => cycle.iter().copied().min(),
266 }
267 }
268}
269
270#[cfg(test)]
271#[allow(clippy::unwrap_used)]
272mod tests {
273 use super::{VictimPolicy, WaitForGraph};
274 use crate::TxnId;
275
276 fn t(id: u64) -> TxnId {
277 TxnId::new(id)
278 }
279
280 #[test]
281 fn test_empty_graph_has_no_cycle() {
282 let g = WaitForGraph::new();
283 assert!(g.is_empty());
284 assert!(g.detect_cycle().is_none());
285 }
286
287 #[test]
288 fn test_self_edge_ignored() {
289 let mut g = WaitForGraph::new();
290 g.add_wait(t(1), t(1));
291 assert!(g.is_empty());
292 assert!(g.detect_cycle().is_none());
293 }
294
295 #[test]
296 fn test_chain_has_no_cycle() {
297 let mut g = WaitForGraph::new();
298 g.add_wait(t(1), t(2));
299 g.add_wait(t(2), t(3));
300 g.add_wait(t(3), t(4));
301 assert!(g.detect_cycle().is_none());
302 }
303
304 #[test]
305 fn test_two_cycle_detected() {
306 let mut g = WaitForGraph::new();
307 g.add_wait(t(1), t(2));
308 g.add_wait(t(2), t(1));
309 let cycle = g.detect_cycle().unwrap();
310 assert_eq!(cycle.len(), 2);
311 assert!(cycle.contains(&t(1)) && cycle.contains(&t(2)));
312 }
313
314 #[test]
315 fn test_three_cycle_detected() {
316 let mut g = WaitForGraph::new();
317 g.add_wait(t(1), t(2));
318 g.add_wait(t(2), t(3));
319 g.add_wait(t(3), t(1));
320 let cycle = g.detect_cycle().unwrap();
321 assert_eq!(cycle.len(), 3);
322 }
323
324 #[test]
325 fn test_cycle_from_unknown_txn_is_none() {
326 let mut g = WaitForGraph::new();
327 g.add_wait(t(1), t(2));
328 g.add_wait(t(2), t(1));
329 assert!(g.cycle_from(t(99)).is_none());
330 }
331
332 #[test]
333 fn test_cycle_from_finds_cycle_containing_start() {
334 let mut g = WaitForGraph::new();
335 g.add_wait(t(1), t(2));
336 g.add_wait(t(2), t(3));
337 g.add_wait(t(3), t(2)); // cycle 2<->3, reachable from 1
338 let cycle = g.cycle_from(t(1)).unwrap();
339 assert!(cycle.contains(&t(2)) && cycle.contains(&t(3)));
340 assert!(!cycle.contains(&t(1))); // 1 is not in the cycle
341 }
342
343 #[test]
344 fn test_clear_waiter_breaks_cycle() {
345 let mut g = WaitForGraph::new();
346 g.add_wait(t(1), t(2));
347 g.add_wait(t(2), t(1));
348 g.clear_waiter(t(1));
349 assert!(g.detect_cycle().is_none());
350 }
351
352 #[test]
353 fn test_remove_txn_drops_incoming_and_outgoing() {
354 let mut g = WaitForGraph::new();
355 g.add_wait(t(1), t(2));
356 g.add_wait(t(2), t(1));
357 g.add_wait(t(3), t(2));
358 g.remove_txn(t(2));
359 assert!(g.detect_cycle().is_none());
360 // t(3)'s edge to the removed t(2) is gone.
361 assert!(g.cycle_from(t(3)).is_none());
362 }
363
364 #[test]
365 fn test_diamond_no_cycle() {
366 // 1->2, 1->3, 2->4, 3->4: a DAG, no cycle.
367 let mut g = WaitForGraph::new();
368 g.add_wait(t(1), t(2));
369 g.add_wait(t(1), t(3));
370 g.add_wait(t(2), t(4));
371 g.add_wait(t(3), t(4));
372 assert!(g.detect_cycle().is_none());
373 }
374
375 #[test]
376 fn test_pick_victim_policies() {
377 let cycle = [t(3), t(7), t(5)];
378 assert_eq!(
379 WaitForGraph::pick_victim(&cycle, VictimPolicy::Youngest),
380 Some(t(7))
381 );
382 assert_eq!(
383 WaitForGraph::pick_victim(&cycle, VictimPolicy::Oldest),
384 Some(t(3))
385 );
386 assert_eq!(WaitForGraph::pick_victim(&[], VictimPolicy::Youngest), None);
387 }
388
389 #[test]
390 fn test_detected_cycle_is_an_actual_cycle() {
391 // Every consecutive pair in the returned cycle must be a real edge, and
392 // the last must wait for the first.
393 let mut g = WaitForGraph::new();
394 g.add_wait(t(1), t(2));
395 g.add_wait(t(2), t(3));
396 g.add_wait(t(3), t(1));
397 let cycle = g.detect_cycle().unwrap();
398 for i in 0..cycle.len() {
399 let from = cycle[i];
400 let to = cycle[(i + 1) % cycle.len()];
401 let edges = g.edges.get(&from).unwrap();
402 assert!(edges.contains(&to), "missing edge {from:?} -> {to:?}");
403 }
404 }
405
406 #[test]
407 fn test_default_policy_is_youngest() {
408 assert_eq!(VictimPolicy::default(), VictimPolicy::Youngest);
409 }
410}