small_db/btree/table/delete.rs
1use std::{
2 cmp,
3 io::Write,
4 ops::DerefMut,
5 sync::{Arc, RwLock},
6 usize,
7};
8
9use crate::{
10 btree::{
11 page::{
12 BTreeHeaderPage, BTreeInternalPage,
13 BTreeInternalPageIterator, BTreeLeafPage,
14 BTreeLeafPageIterator, BTreePage, BTreePageID, Entry,
15 PageCategory,
16 },
17 tuple::WrappedTuple,
18 },
19 concurrent_status::Permission,
20 error::SmallError,
21 field::IntField,
22 transaction::Transaction,
23 types::SmallResult,
24 utils::HandyRwLock,
25 BTreeTable, Unique,
26};
27
28/// delete-related methods
29impl BTreeTable {
30 /// Delete a tuple from this BTreeFile.
31 ///
32 /// May cause pages to merge or redistribute entries/tuples if the
33 /// pages become less than half full.
34 pub fn delete_tuple(
35 &self,
36 tx: &Transaction,
37 tuple: &WrappedTuple,
38 ) -> SmallResult {
39 let pid = tuple.get_pid();
40 let leaf_rc = Unique::mut_page_cache()
41 .get_leaf_page(tx, Permission::ReadWrite, &pid)
42 .unwrap();
43
44 // hold the leaf page
45 {
46 let mut leaf = leaf_rc.wl();
47 leaf.delete_tuple(tuple.get_slot_number());
48 }
49 // release the leaf page
50
51 if leaf_rc.rl().stable() {
52 return Ok(());
53 } else {
54 return self.handle_erratic_leaf_page(tx, leaf_rc);
55 }
56 }
57
58 /// Handle the case when a leaf page becomes less than half full
59 /// due to deletions.
60 ///
61 /// If one of its siblings has extra tuples, redistribute those
62 /// tuples. Otherwise merge with one of the siblings. Update
63 /// pointers as needed.
64 fn handle_erratic_leaf_page(
65 &self,
66 tx: &Transaction,
67 page_rc: Arc<RwLock<BTreeLeafPage>>,
68 ) -> SmallResult {
69 if page_rc.rl().get_parent_pid().category
70 == PageCategory::RootPointer
71 {
72 return Ok(());
73 }
74
75 let left_pid = page_rc.rl().get_left_pid();
76 let right_pid = page_rc.rl().get_right_pid();
77
78 if let Some(left_pid) = left_pid {
79 let left_rc = Unique::mut_page_cache()
80 .get_leaf_page(tx, Permission::ReadWrite, &left_pid)
81 .unwrap();
82 self.balancing_two_leaf_pages(tx, left_rc, page_rc)?;
83 } else if let Some(right_pid) = right_pid {
84 let right_rc = Unique::mut_page_cache()
85 .get_leaf_page(tx, Permission::ReadWrite, &right_pid)
86 .unwrap();
87 self.balancing_two_leaf_pages(tx, page_rc, right_rc)?;
88 } else {
89 return Err(SmallError::new(
90 "BTreeTable::handle_erratic_leaf_page no left or right sibling",
91 ));
92 };
93
94 return Ok(());
95 }
96
97 /// Handle the case when an internal page becomes less than half
98 /// full due to deletions.
99 ///
100 /// If one of its siblings has extra entries, redistribute those
101 /// entries. Otherwise merge with one of the siblings. Update
102 /// pointers as needed.
103 ///
104 /// # Arguments
105 ///
106 /// - page_rc - the erratic internal page to be handled
107 fn handle_erratic_internal_page(
108 &self,
109 tx: &Transaction,
110 page_rc: Arc<RwLock<BTreeInternalPage>>,
111 ) -> SmallResult {
112 if page_rc.rl().get_parent_pid().category
113 == PageCategory::RootPointer
114 {
115 return Ok(());
116 }
117
118 let left_pid = page_rc.rl().get_left_sibling_pid(tx);
119 let right_pid = page_rc.rl().get_right_sibling_pid(tx);
120 if let Some(left_pid) = left_pid {
121 let left_rc = Unique::mut_page_cache()
122 .get_internal_page(
123 tx,
124 Permission::ReadWrite,
125 &left_pid,
126 )
127 .unwrap();
128 self.balancing_two_internal_pages(tx, left_rc, page_rc)?;
129 } else if let Some(right_pid) = right_pid {
130 let right_rc = Unique::mut_page_cache()
131 .get_internal_page(
132 tx,
133 Permission::ReadWrite,
134 &right_pid,
135 )
136 .unwrap();
137 self.balancing_two_internal_pages(tx, page_rc, right_rc)?;
138 } else {
139 panic!("Cannot find the left/right sibling of the page");
140 }
141
142 Ok(())
143 }
144
145 fn set_parent_pid(
146 &self,
147 tx: &Transaction,
148 child_pid: &BTreePageID,
149 parent_pid: &BTreePageID,
150 ) {
151 match child_pid.category {
152 PageCategory::Leaf => {
153 let child_rc = Unique::mut_page_cache()
154 .get_leaf_page(
155 tx,
156 Permission::ReadWrite,
157 child_pid,
158 )
159 .unwrap();
160 child_rc.wl().set_parent_pid(&parent_pid);
161 }
162 PageCategory::Internal => {
163 let child_rc = Unique::mut_page_cache()
164 .get_internal_page(
165 tx,
166 Permission::ReadOnly,
167 child_pid,
168 )
169 .unwrap();
170 child_rc.wl().set_parent_pid(&parent_pid);
171 }
172 _ => panic!("Invalid page category"),
173 }
174 }
175
176 /// # Arguments
177 ///
178 /// - parent_entry - the entry in the parent corresponding to the
179 /// left and right
180 fn merge_internal_page(
181 &self,
182 tx: &Transaction,
183 left_rc: Arc<RwLock<BTreeInternalPage>>,
184 right_rc: Arc<RwLock<BTreeInternalPage>>,
185 parent_rc: Arc<RwLock<BTreeInternalPage>>,
186 parent_entry: &Entry,
187 ) -> SmallResult {
188 // hold left_rc and right_rc
189 {
190 let mut left = left_rc.wl();
191 let mut right = right_rc.wl();
192
193 // stage 1: pull down the edge entry from parent and
194 // insert it into target page
195 let edge_entry = Entry::new(
196 parent_entry.get_key(),
197 &left.get_last_child_pid(),
198 &right.get_first_child_pid(),
199 );
200 self.set_parent_pid(
201 tx,
202 &right.get_first_child_pid(),
203 &left.get_pid(),
204 );
205 left.insert_entry(&edge_entry)?;
206
207 // stage 2: move the entries from the one page to the
208 // other
209 let mut deleted_indexes = Vec::new();
210 let iter = BTreeInternalPageIterator::new(&right);
211 for e in iter {
212 left.insert_entry(&e)?;
213 self.set_parent_pid(
214 tx,
215 &e.get_right_child(),
216 &left.get_pid(),
217 );
218 deleted_indexes.push(e.get_record_id());
219 }
220 for i in deleted_indexes {
221 right.delete_key_and_right_child(i);
222 }
223
224 // stage 3: set the right as empty
225 self.set_empty_page(tx, &right.get_pid());
226 }
227 // release left_rc and right_rc
228
229 // stage 4: update the entry in parent which points to the
230 // left and right
231 self.delete_parent_entry(
232 tx,
233 left_rc,
234 parent_rc,
235 parent_entry,
236 )?;
237
238 Ok(())
239 }
240
241 /// # Arguments
242 ///
243 /// - entry - the entry in the parent corresponding to the
244 /// left_child and right_child
245 fn merge_leaf_page(
246 &self,
247 tx: &Transaction,
248 left_rc: Arc<RwLock<BTreeLeafPage>>,
249 right_rc: Arc<RwLock<BTreeLeafPage>>,
250 parent_rc: Arc<RwLock<BTreeInternalPage>>,
251 entry: &Entry,
252 ) -> SmallResult {
253 // hold the left and right page
254 {
255 let mut left = left_rc.wl();
256 let mut right = right_rc.wl();
257
258 // stage 1: move the tuples from right to left
259 let mut it = BTreeLeafPageIterator::new(&right);
260 let mut deleted = Vec::new();
261 for t in it.by_ref() {
262 left.insert_tuple(&t);
263 deleted.push(t.get_slot_number());
264 }
265 for slot in deleted {
266 right.delete_tuple(slot);
267 }
268
269 // stage 2: update sibling pointers
270
271 // set the right pointer of the left page to the right
272 // page's right pointer
273 left.set_right_pid(right.get_right_pid());
274
275 // set the left pointer for the newer right page
276 if let Some(newer_right_pid) = right.get_right_pid() {
277 let newer_right_rc = Unique::mut_page_cache()
278 .get_leaf_page(
279 tx,
280 Permission::ReadWrite,
281 &newer_right_pid,
282 )
283 .unwrap();
284 newer_right_rc
285 .wl()
286 .set_left_pid(Some(left.get_pid()));
287 }
288
289 // stage 4: set the right page as empty
290 self.set_empty_page(tx, &right.get_pid());
291 }
292
293 // stage 5: release the left and right page
294 self.delete_parent_entry(tx, left_rc, parent_rc, entry)?;
295
296 Ok(())
297 }
298
299 /// Method to encapsulate the process of deleting an entry
300 /// (specifically the key and right child) from a parent node.
301 ///
302 /// If the parent becomes empty (no keys remaining), that
303 /// indicates that it was the root node and should be replaced
304 /// by its one remaining child.
305 ///
306 /// Otherwise, if it gets below minimum occupancy for non-root
307 /// internal nodes, it should steal from one of its siblings
308 /// or merge with a sibling.
309 ///
310 /// # Arguments
311 ///
312 /// - reserved_child - the child reserved after the key and
313 /// another child are deleted
314 /// - page - the parent containing the entry to be
315 /// deleted
316 /// - entry - the entry to be deleted
317 /// - delete_left_child - which child of the entry should be
318 /// deleted
319 fn delete_parent_entry<PAGE: BTreePage>(
320 &self,
321 tx: &Transaction,
322 left_rc: Arc<RwLock<PAGE>>,
323 parent_rc: Arc<RwLock<BTreeInternalPage>>,
324 entry: &Entry,
325 ) -> SmallResult {
326 // hold the parent and left page
327 {
328 let mut parent = parent_rc.wl();
329 let mut left = left_rc.wl();
330
331 // stage 1: delete the corresponding entry in the parent
332 // page
333 parent.delete_key_and_right_child(entry.get_record_id());
334
335 // stage 2: handle the parent page according to the
336 // following cases case 1: parent is empty,
337 // then the left child is now the new root
338 if parent.entries_count() == 0 {
339 let root_ptr_page_rc = self.get_root_ptr_page(tx);
340
341 // hold the root pointer page
342 {
343 let mut root_ptr_page = root_ptr_page_rc.wl();
344 left.set_parent_pid(&root_ptr_page.get_pid());
345 root_ptr_page.set_root_pid(&left.get_pid());
346 }
347 // release the root pointer page
348
349 // release the page for reuse
350 self.set_empty_page(tx, &parent.get_pid());
351 return Ok(());
352 }
353
354 // case 2: parent is stable, return directly
355 if parent.stable() {
356 return Ok(());
357 }
358 }
359 // release the parent and left page
360
361 // case 3: parent is unstable (erratic), handle it
362 self.handle_erratic_internal_page(tx, parent_rc)?;
363 Ok(())
364 }
365
366 /// Mark a page in this BTreeTable as empty. Find the
367 /// corresponding header page (create it if needed), and mark
368 /// the corresponding slot in the header page as empty.
369 fn set_empty_page(&self, tx: &Transaction, pid: &BTreePageID) {
370 Unique::mut_page_cache().discard_page(pid);
371
372 let root_ptr_rc = self.get_root_ptr_page(tx);
373 let header_rc: Arc<RwLock<BTreeHeaderPage>>;
374
375 // let mut root_ptr = root_ptr_rc.wl();
376 match root_ptr_rc.rl().get_header_pid() {
377 Some(header_pid) => {
378 header_rc = Unique::mut_page_cache()
379 .get_header_page(
380 tx,
381 Permission::ReadWrite,
382 &header_pid,
383 )
384 .unwrap();
385 }
386 None => {
387 // if there are no header pages, create the first
388 // header page and update the header
389 // pointer in the BTreeRootPtrPage
390 header_rc = self.get_empty_header_page(tx);
391 }
392 }
393
394 root_ptr_rc.wl().set_header_pid(&header_rc.rl().get_pid());
395
396 // borrow of header_rc start here
397 {
398 let mut header = header_rc.wl();
399 let slot_index =
400 pid.page_index as usize % header.get_slots_count();
401 header.mark_slot_status(slot_index, false);
402 }
403 // borrow of header_rc end here
404 }
405
406 /// Balancing two internal pages according the situation:
407 ///
408 /// 1. Merge the two pages if the count of entries in the two
409 /// pages is less than the maximum capacity of a single page.
410 ///
411 /// 2. Otherwise, steal entries from the sibling and copy them to
412 /// the given page so that both pages are at least half full.
413 ///
414 /// Keys can be thought of as rotating through the parent entry,
415 /// so the original key in the parent is "pulled down" to the
416 /// erratic page, and the last key in the sibling page is
417 /// "pushed up" to the parent. Update parent pointers as
418 /// needed.
419 fn balancing_two_internal_pages(
420 &self,
421 tx: &Transaction,
422 left_rc: Arc<RwLock<BTreeInternalPage>>,
423 right_rc: Arc<RwLock<BTreeInternalPage>>,
424 ) -> SmallResult {
425 let parent_rc = Unique::mut_page_cache()
426 .get_internal_page(
427 tx,
428 Permission::ReadWrite,
429 &left_rc.rl().get_parent_pid(),
430 )
431 .unwrap();
432 let mut parent_entry = parent_rc
433 .rl()
434 .get_entry_by_children(
435 &left_rc.rl().get_pid(),
436 &right_rc.rl().get_pid(),
437 )
438 .unwrap();
439
440 let left_children = left_rc.rl().children_count();
441 let right_children = right_rc.rl().children_count();
442 if left_children + right_children
443 <= left_rc.rl().get_children_capacity()
444 {
445 // if the two pages can be merged, merge them
446 return self.merge_internal_page(
447 tx,
448 left_rc,
449 right_rc,
450 parent_rc,
451 &parent_entry,
452 );
453 }
454
455 // if there aren't any entries to move, return immediately
456 let move_count = (left_children + right_children) / 2
457 - cmp::min(left_children, right_children);
458 if move_count == 0 {
459 return Ok(());
460 }
461
462 let mut middle_key = parent_entry.get_key();
463
464 // hold the left and right page
465 {
466 let mut left = left_rc.wl();
467 let mut right = right_rc.wl();
468
469 if left_children < right_children {
470 // The edge child of the destination page.
471 let edge_child_pid = left.get_last_child_pid();
472
473 let right_iter =
474 BTreeInternalPageIterator::new(&right);
475
476 let moved_records = self.move_entries(
477 tx,
478 right_iter,
479 left,
480 move_count,
481 &mut middle_key,
482 edge_child_pid,
483 |edge_pid: BTreePageID, _e: &Entry| edge_pid,
484 |_edge_pid: BTreePageID, e: &Entry| {
485 e.get_left_child()
486 },
487 |e: &Entry| e.get_left_child(),
488 )?;
489
490 for i in moved_records {
491 right.delete_key_and_left_child(i);
492 }
493 } else {
494 // The edge child of the destination page.
495 let edge_child_pid = right.get_first_child_pid();
496
497 let left_iter =
498 BTreeInternalPageIterator::new(&left).rev();
499
500 let moved_records = self.move_entries(
501 tx,
502 left_iter,
503 right,
504 move_count,
505 &mut middle_key,
506 edge_child_pid,
507 |_edge_pid: BTreePageID, e: &Entry| {
508 e.get_right_child()
509 },
510 |edge_pid: BTreePageID, _e: &Entry| edge_pid,
511 |e: &Entry| e.get_right_child(),
512 )?;
513
514 for i in moved_records {
515 left.delete_key_and_right_child(i);
516 }
517 }
518 }
519 // release the left and right page
520
521 parent_entry.set_key(middle_key);
522 parent_rc.wl().update_entry(&parent_entry);
523 Ok(())
524 }
525
526 /// # Arguments:
527 ///
528 /// * `middle_key`: The key between the left and right pages. This
529 /// key is always larger than children in the left page and
530 /// smaller than children in the right page. It should be
531 /// updated each time an entry is moved from the left/right page
532 /// to the otherside.
533 ///
534 /// * `edge_child_pid`: The edge child of the destination page.
535 ///
536 /// * `fn_get_edge_left_child`: A function to get the left child
537 /// of the new entry, the first argument is the edge child of
538 /// the destination page, the second argument is the current
539 /// entry of the source page (iterator).
540 ///
541 /// * `fn_get_edge_right_child`: Same as `fn_get_edge_left_child`,
542 /// but for the right child of the new entry.
543 ///
544 /// * `fn_get_moved_child`: A function to get the moved child
545 /// page, the argument is the current entry of the source page
546 /// (iterator).
547 ///
548 /// Return:
549 /// * The index of the moved entries in the source page.
550 fn move_entries(
551 &self,
552 tx: &Transaction,
553 src_iter: impl Iterator<Item = Entry>,
554 mut dest: impl DerefMut<Target = BTreeInternalPage>,
555 move_count: usize,
556 middle_key: &mut IntField,
557 mut edge_child_pid: BTreePageID,
558 fn_get_edge_left_child: impl Fn(
559 BTreePageID,
560 &Entry,
561 ) -> BTreePageID,
562 fn_get_edge_right_child: impl Fn(
563 BTreePageID,
564 &Entry,
565 ) -> BTreePageID,
566 fn_get_moved_child: impl Fn(&Entry) -> BTreePageID,
567 ) -> Result<Vec<usize>, SmallError> {
568 // Remember the entries for deletion later (cause we can't
569 // modify the page while iterating though it)
570 let mut moved_records = Vec::new();
571
572 for e in src_iter.take(move_count) {
573 // 1. delete the entry from the src page
574 moved_records.push(e.get_record_id());
575
576 // 2. insert new entry to dest page
577 let new_entry = Entry::new(
578 *middle_key,
579 &fn_get_edge_left_child(edge_child_pid, &e),
580 &fn_get_edge_right_child(edge_child_pid, &e),
581 );
582 dest.insert_entry(&new_entry)?;
583
584 // 3. update parent id for the moved child
585 self.set_parent_pid(
586 tx,
587 &fn_get_moved_child(&e),
588 &dest.get_pid(),
589 );
590
591 // 4. update key and edge child for the next iteration
592 *middle_key = e.get_key();
593 edge_child_pid = fn_get_moved_child(&e);
594 }
595 return Ok(moved_records);
596 }
597
598 /// Steal tuples from a sibling and copy them to the given page so
599 /// that both pages are at least half full. Update the
600 /// parent's entry so that the key matches the key field of
601 /// the first tuple in the right-hand page.
602 ///
603 /// # Arguments
604 ///
605 /// - page - the leaf page which is less than half full
606 /// - sibling - the sibling which has tuples to spare
607 /// - parent - the parent of the two leaf pages
608 /// - entry - the entry in the parent pointing to the two
609 /// leaf pages
610 /// - is_right_sibling - whether the sibling is a right-sibling
611 fn balancing_two_leaf_pages(
612 &self,
613 tx: &Transaction,
614 left_rc: Arc<RwLock<BTreeLeafPage>>,
615 right_rc: Arc<RwLock<BTreeLeafPage>>,
616 ) -> SmallResult {
617 let parent_rc = Unique::mut_page_cache()
618 .get_internal_page(
619 tx,
620 Permission::ReadWrite,
621 &left_rc.rl().get_parent_pid(),
622 )
623 .unwrap();
624 let mut entry = parent_rc
625 .rl()
626 .get_entry_by_children(
627 &left_rc.rl().get_pid(),
628 &right_rc.rl().get_pid(),
629 )
630 .unwrap();
631
632 let left_tuples = left_rc.rl().tuples_count();
633 let right_tuples = right_rc.rl().tuples_count();
634 if left_tuples + right_tuples
635 <= left_rc.rl().get_slots_count()
636 {
637 // if the two pages can be merged, merge them
638 return self.merge_leaf_page(
639 tx, left_rc, right_rc, parent_rc, &entry,
640 );
641 }
642
643 let move_count = (left_tuples + right_tuples) / 2
644 - cmp::min(left_tuples, right_tuples);
645 if move_count == 0 {
646 return self.merge_leaf_page(
647 tx, left_rc, right_rc, parent_rc, &entry,
648 );
649 }
650
651 let mut key = entry.get_key();
652
653 // hold left and right page
654 {
655 let mut left = left_rc.wl();
656 let mut right = right_rc.wl();
657
658 if left_tuples < right_tuples {
659 let iter = BTreeLeafPageIterator::new(&right);
660 let mut deleted_indexes = Vec::new();
661 for tuple in iter.take(move_count) {
662 left.insert_tuple(&tuple);
663 deleted_indexes.push(tuple.get_slot_number());
664 key = tuple.get_field(self.key_field);
665 }
666 for i in deleted_indexes {
667 right.delete_tuple(i);
668 }
669 } else {
670 let iter = BTreeLeafPageIterator::new(&left);
671 let mut deleted_indexes = Vec::new();
672 for tuple in iter.rev().take(move_count) {
673 right.insert_tuple(&tuple);
674 deleted_indexes.push(tuple.get_slot_number());
675 key = tuple.get_field(self.key_field);
676 }
677 for i in deleted_indexes {
678 left.delete_tuple(i);
679 }
680 }
681 }
682 // release left and right page
683
684 entry.set_key(key);
685 parent_rc.wl().update_entry(&entry);
686
687 Ok(())
688 }
689}