small_db/btree/table/
delete.rs

1use std::{
2    cmp,
3    io::Write,
4    ops::DerefMut,
5    sync::{Arc, RwLock},
6    usize,
7};
8
9use crate::{
10    btree::{
11        page::{
12            BTreeHeaderPage, BTreeInternalPage,
13            BTreeInternalPageIterator, BTreeLeafPage,
14            BTreeLeafPageIterator, BTreePage, BTreePageID, Entry,
15            PageCategory,
16        },
17        tuple::WrappedTuple,
18    },
19    concurrent_status::Permission,
20    error::SmallError,
21    field::IntField,
22    transaction::Transaction,
23    types::SmallResult,
24    utils::HandyRwLock,
25    BTreeTable, Unique,
26};
27
28/// delete-related methods
29impl BTreeTable {
30    /// Delete a tuple from this BTreeFile.
31    ///
32    /// May cause pages to merge or redistribute entries/tuples if the
33    /// pages become less than half full.
34    pub fn delete_tuple(
35        &self,
36        tx: &Transaction,
37        tuple: &WrappedTuple,
38    ) -> SmallResult {
39        let pid = tuple.get_pid();
40        let leaf_rc = Unique::mut_page_cache()
41            .get_leaf_page(tx, Permission::ReadWrite, &pid)
42            .unwrap();
43
44        // hold the leaf page
45        {
46            let mut leaf = leaf_rc.wl();
47            leaf.delete_tuple(tuple.get_slot_number());
48        }
49        // release the leaf page
50
51        if leaf_rc.rl().stable() {
52            return Ok(());
53        } else {
54            return self.handle_erratic_leaf_page(tx, leaf_rc);
55        }
56    }
57
58    /// Handle the case when a leaf page becomes less than half full
59    /// due to deletions.
60    ///
61    /// If one of its siblings has extra tuples, redistribute those
62    /// tuples. Otherwise merge with one of the siblings. Update
63    /// pointers as needed.
64    fn handle_erratic_leaf_page(
65        &self,
66        tx: &Transaction,
67        page_rc: Arc<RwLock<BTreeLeafPage>>,
68    ) -> SmallResult {
69        if page_rc.rl().get_parent_pid().category
70            == PageCategory::RootPointer
71        {
72            return Ok(());
73        }
74
75        let left_pid = page_rc.rl().get_left_pid();
76        let right_pid = page_rc.rl().get_right_pid();
77
78        if let Some(left_pid) = left_pid {
79            let left_rc = Unique::mut_page_cache()
80                .get_leaf_page(tx, Permission::ReadWrite, &left_pid)
81                .unwrap();
82            self.balancing_two_leaf_pages(tx, left_rc, page_rc)?;
83        } else if let Some(right_pid) = right_pid {
84            let right_rc = Unique::mut_page_cache()
85                .get_leaf_page(tx, Permission::ReadWrite, &right_pid)
86                .unwrap();
87            self.balancing_two_leaf_pages(tx, page_rc, right_rc)?;
88        } else {
89            return Err(SmallError::new(
90                "BTreeTable::handle_erratic_leaf_page no left or right sibling",
91            ));
92        };
93
94        return Ok(());
95    }
96
97    /// Handle the case when an internal page becomes less than half
98    /// full due to deletions.
99    ///
100    /// If one of its siblings has extra entries, redistribute those
101    /// entries. Otherwise merge with one of the siblings. Update
102    /// pointers as needed.
103    ///
104    /// # Arguments
105    ///
106    /// - page_rc - the erratic internal page to be handled
107    fn handle_erratic_internal_page(
108        &self,
109        tx: &Transaction,
110        page_rc: Arc<RwLock<BTreeInternalPage>>,
111    ) -> SmallResult {
112        if page_rc.rl().get_parent_pid().category
113            == PageCategory::RootPointer
114        {
115            return Ok(());
116        }
117
118        let left_pid = page_rc.rl().get_left_sibling_pid(tx);
119        let right_pid = page_rc.rl().get_right_sibling_pid(tx);
120        if let Some(left_pid) = left_pid {
121            let left_rc = Unique::mut_page_cache()
122                .get_internal_page(
123                    tx,
124                    Permission::ReadWrite,
125                    &left_pid,
126                )
127                .unwrap();
128            self.balancing_two_internal_pages(tx, left_rc, page_rc)?;
129        } else if let Some(right_pid) = right_pid {
130            let right_rc = Unique::mut_page_cache()
131                .get_internal_page(
132                    tx,
133                    Permission::ReadWrite,
134                    &right_pid,
135                )
136                .unwrap();
137            self.balancing_two_internal_pages(tx, page_rc, right_rc)?;
138        } else {
139            panic!("Cannot find the left/right sibling of the page");
140        }
141
142        Ok(())
143    }
144
145    fn set_parent_pid(
146        &self,
147        tx: &Transaction,
148        child_pid: &BTreePageID,
149        parent_pid: &BTreePageID,
150    ) {
151        match child_pid.category {
152            PageCategory::Leaf => {
153                let child_rc = Unique::mut_page_cache()
154                    .get_leaf_page(
155                        tx,
156                        Permission::ReadWrite,
157                        child_pid,
158                    )
159                    .unwrap();
160                child_rc.wl().set_parent_pid(&parent_pid);
161            }
162            PageCategory::Internal => {
163                let child_rc = Unique::mut_page_cache()
164                    .get_internal_page(
165                        tx,
166                        Permission::ReadOnly,
167                        child_pid,
168                    )
169                    .unwrap();
170                child_rc.wl().set_parent_pid(&parent_pid);
171            }
172            _ => panic!("Invalid page category"),
173        }
174    }
175
176    /// # Arguments
177    ///
178    /// - parent_entry - the entry in the parent corresponding to the
179    ///   left and right
180    fn merge_internal_page(
181        &self,
182        tx: &Transaction,
183        left_rc: Arc<RwLock<BTreeInternalPage>>,
184        right_rc: Arc<RwLock<BTreeInternalPage>>,
185        parent_rc: Arc<RwLock<BTreeInternalPage>>,
186        parent_entry: &Entry,
187    ) -> SmallResult {
188        // hold left_rc and right_rc
189        {
190            let mut left = left_rc.wl();
191            let mut right = right_rc.wl();
192
193            // stage 1: pull down the edge entry from parent and
194            // insert it into target page
195            let edge_entry = Entry::new(
196                parent_entry.get_key(),
197                &left.get_last_child_pid(),
198                &right.get_first_child_pid(),
199            );
200            self.set_parent_pid(
201                tx,
202                &right.get_first_child_pid(),
203                &left.get_pid(),
204            );
205            left.insert_entry(&edge_entry)?;
206
207            // stage 2: move the entries from the one page to the
208            // other
209            let mut deleted_indexes = Vec::new();
210            let iter = BTreeInternalPageIterator::new(&right);
211            for e in iter {
212                left.insert_entry(&e)?;
213                self.set_parent_pid(
214                    tx,
215                    &e.get_right_child(),
216                    &left.get_pid(),
217                );
218                deleted_indexes.push(e.get_record_id());
219            }
220            for i in deleted_indexes {
221                right.delete_key_and_right_child(i);
222            }
223
224            // stage 3: set the right as empty
225            self.set_empty_page(tx, &right.get_pid());
226        }
227        // release left_rc and right_rc
228
229        // stage 4: update the entry in parent which points to the
230        // left and right
231        self.delete_parent_entry(
232            tx,
233            left_rc,
234            parent_rc,
235            parent_entry,
236        )?;
237
238        Ok(())
239    }
240
241    /// # Arguments
242    ///
243    /// - entry - the entry in the parent corresponding to the
244    ///   left_child and right_child
245    fn merge_leaf_page(
246        &self,
247        tx: &Transaction,
248        left_rc: Arc<RwLock<BTreeLeafPage>>,
249        right_rc: Arc<RwLock<BTreeLeafPage>>,
250        parent_rc: Arc<RwLock<BTreeInternalPage>>,
251        entry: &Entry,
252    ) -> SmallResult {
253        // hold the left and right page
254        {
255            let mut left = left_rc.wl();
256            let mut right = right_rc.wl();
257
258            // stage 1: move the tuples from right to left
259            let mut it = BTreeLeafPageIterator::new(&right);
260            let mut deleted = Vec::new();
261            for t in it.by_ref() {
262                left.insert_tuple(&t);
263                deleted.push(t.get_slot_number());
264            }
265            for slot in deleted {
266                right.delete_tuple(slot);
267            }
268
269            // stage 2: update sibling pointers
270
271            // set the right pointer of the left page to the right
272            // page's right pointer
273            left.set_right_pid(right.get_right_pid());
274
275            // set the left pointer for the newer right page
276            if let Some(newer_right_pid) = right.get_right_pid() {
277                let newer_right_rc = Unique::mut_page_cache()
278                    .get_leaf_page(
279                        tx,
280                        Permission::ReadWrite,
281                        &newer_right_pid,
282                    )
283                    .unwrap();
284                newer_right_rc
285                    .wl()
286                    .set_left_pid(Some(left.get_pid()));
287            }
288
289            // stage 4: set the right page as empty
290            self.set_empty_page(tx, &right.get_pid());
291        }
292
293        // stage 5: release the left and right page
294        self.delete_parent_entry(tx, left_rc, parent_rc, entry)?;
295
296        Ok(())
297    }
298
299    /// Method to encapsulate the process of deleting an entry
300    /// (specifically the key and right child) from a parent node.
301    ///
302    /// If the parent becomes empty (no keys remaining), that
303    /// indicates that it was the root node and should be replaced
304    /// by its one remaining child.
305    ///
306    /// Otherwise, if it gets below minimum occupancy for non-root
307    /// internal nodes, it should steal from one of its siblings
308    /// or merge with a sibling.
309    ///
310    /// # Arguments
311    ///
312    /// - reserved_child    - the child reserved after the key and
313    ///   another child are deleted
314    /// - page              - the parent containing the entry to be
315    ///   deleted
316    /// - entry             - the entry to be deleted
317    /// - delete_left_child - which child of the entry should be
318    ///   deleted
319    fn delete_parent_entry<PAGE: BTreePage>(
320        &self,
321        tx: &Transaction,
322        left_rc: Arc<RwLock<PAGE>>,
323        parent_rc: Arc<RwLock<BTreeInternalPage>>,
324        entry: &Entry,
325    ) -> SmallResult {
326        // hold the parent and left page
327        {
328            let mut parent = parent_rc.wl();
329            let mut left = left_rc.wl();
330
331            // stage 1: delete the corresponding entry in the parent
332            // page
333            parent.delete_key_and_right_child(entry.get_record_id());
334
335            // stage 2: handle the parent page according to the
336            // following cases case 1: parent is empty,
337            // then the left child is now the new root
338            if parent.entries_count() == 0 {
339                let root_ptr_page_rc = self.get_root_ptr_page(tx);
340
341                // hold the root pointer page
342                {
343                    let mut root_ptr_page = root_ptr_page_rc.wl();
344                    left.set_parent_pid(&root_ptr_page.get_pid());
345                    root_ptr_page.set_root_pid(&left.get_pid());
346                }
347                // release the root pointer page
348
349                // release the page for reuse
350                self.set_empty_page(tx, &parent.get_pid());
351                return Ok(());
352            }
353
354            // case 2: parent is stable, return directly
355            if parent.stable() {
356                return Ok(());
357            }
358        }
359        // release the parent and left page
360
361        // case 3: parent is unstable (erratic), handle it
362        self.handle_erratic_internal_page(tx, parent_rc)?;
363        Ok(())
364    }
365
366    /// Mark a page in this BTreeTable as empty. Find the
367    /// corresponding header page (create it if needed), and mark
368    /// the corresponding slot in the header page as empty.
369    fn set_empty_page(&self, tx: &Transaction, pid: &BTreePageID) {
370        Unique::mut_page_cache().discard_page(pid);
371
372        let root_ptr_rc = self.get_root_ptr_page(tx);
373        let header_rc: Arc<RwLock<BTreeHeaderPage>>;
374
375        // let mut root_ptr = root_ptr_rc.wl();
376        match root_ptr_rc.rl().get_header_pid() {
377            Some(header_pid) => {
378                header_rc = Unique::mut_page_cache()
379                    .get_header_page(
380                        tx,
381                        Permission::ReadWrite,
382                        &header_pid,
383                    )
384                    .unwrap();
385            }
386            None => {
387                // if there are no header pages, create the first
388                // header page and update the header
389                // pointer in the BTreeRootPtrPage
390                header_rc = self.get_empty_header_page(tx);
391            }
392        }
393
394        root_ptr_rc.wl().set_header_pid(&header_rc.rl().get_pid());
395
396        // borrow of header_rc start here
397        {
398            let mut header = header_rc.wl();
399            let slot_index =
400                pid.page_index as usize % header.get_slots_count();
401            header.mark_slot_status(slot_index, false);
402        }
403        // borrow of header_rc end here
404    }
405
406    /// Balancing two internal pages according the situation:
407    ///
408    /// 1.  Merge the two pages if the count of entries in the two
409    /// pages is less than the maximum capacity of a single page.
410    ///
411    /// 2.  Otherwise, steal entries from the sibling and copy them to
412    /// the given page so that both pages are at least half full.
413    ///
414    /// Keys can be thought of as rotating through the parent entry,
415    /// so the original key in the parent is "pulled down" to the
416    /// erratic page, and the last key in the sibling page is
417    /// "pushed up" to the parent.  Update parent pointers as
418    /// needed.
419    fn balancing_two_internal_pages(
420        &self,
421        tx: &Transaction,
422        left_rc: Arc<RwLock<BTreeInternalPage>>,
423        right_rc: Arc<RwLock<BTreeInternalPage>>,
424    ) -> SmallResult {
425        let parent_rc = Unique::mut_page_cache()
426            .get_internal_page(
427                tx,
428                Permission::ReadWrite,
429                &left_rc.rl().get_parent_pid(),
430            )
431            .unwrap();
432        let mut parent_entry = parent_rc
433            .rl()
434            .get_entry_by_children(
435                &left_rc.rl().get_pid(),
436                &right_rc.rl().get_pid(),
437            )
438            .unwrap();
439
440        let left_children = left_rc.rl().children_count();
441        let right_children = right_rc.rl().children_count();
442        if left_children + right_children
443            <= left_rc.rl().get_children_capacity()
444        {
445            // if the two pages can be merged, merge them
446            return self.merge_internal_page(
447                tx,
448                left_rc,
449                right_rc,
450                parent_rc,
451                &parent_entry,
452            );
453        }
454
455        // if there aren't any entries to move, return immediately
456        let move_count = (left_children + right_children) / 2
457            - cmp::min(left_children, right_children);
458        if move_count == 0 {
459            return Ok(());
460        }
461
462        let mut middle_key = parent_entry.get_key();
463
464        // hold the left and right page
465        {
466            let mut left = left_rc.wl();
467            let mut right = right_rc.wl();
468
469            if left_children < right_children {
470                // The edge child of the destination page.
471                let edge_child_pid = left.get_last_child_pid();
472
473                let right_iter =
474                    BTreeInternalPageIterator::new(&right);
475
476                let moved_records = self.move_entries(
477                    tx,
478                    right_iter,
479                    left,
480                    move_count,
481                    &mut middle_key,
482                    edge_child_pid,
483                    |edge_pid: BTreePageID, _e: &Entry| edge_pid,
484                    |_edge_pid: BTreePageID, e: &Entry| {
485                        e.get_left_child()
486                    },
487                    |e: &Entry| e.get_left_child(),
488                )?;
489
490                for i in moved_records {
491                    right.delete_key_and_left_child(i);
492                }
493            } else {
494                // The edge child of the destination page.
495                let edge_child_pid = right.get_first_child_pid();
496
497                let left_iter =
498                    BTreeInternalPageIterator::new(&left).rev();
499
500                let moved_records = self.move_entries(
501                    tx,
502                    left_iter,
503                    right,
504                    move_count,
505                    &mut middle_key,
506                    edge_child_pid,
507                    |_edge_pid: BTreePageID, e: &Entry| {
508                        e.get_right_child()
509                    },
510                    |edge_pid: BTreePageID, _e: &Entry| edge_pid,
511                    |e: &Entry| e.get_right_child(),
512                )?;
513
514                for i in moved_records {
515                    left.delete_key_and_right_child(i);
516                }
517            }
518        }
519        // release the left and right page
520
521        parent_entry.set_key(middle_key);
522        parent_rc.wl().update_entry(&parent_entry);
523        Ok(())
524    }
525
526    /// # Arguments:
527    ///
528    /// * `middle_key`: The key between the left and right pages. This
529    ///   key is always larger than children in the left page and
530    ///   smaller than children in the right page. It should be
531    ///   updated each time an entry is moved from the left/right page
532    ///   to the otherside.
533    ///
534    /// * `edge_child_pid`: The edge child of the destination page.
535    ///
536    /// * `fn_get_edge_left_child`: A function to get the left child
537    ///   of the new entry, the first argument is the edge child of
538    ///   the destination page, the second argument is the current
539    ///   entry of the source page (iterator).
540    ///
541    /// * `fn_get_edge_right_child`: Same as `fn_get_edge_left_child`,
542    ///   but for the right child of the new entry.
543    ///
544    /// * `fn_get_moved_child`: A function to get the moved child
545    ///   page, the argument is the current entry of the source page
546    ///   (iterator).
547    ///
548    /// Return:
549    /// * The index of the moved entries in the source page.
550    fn move_entries(
551        &self,
552        tx: &Transaction,
553        src_iter: impl Iterator<Item = Entry>,
554        mut dest: impl DerefMut<Target = BTreeInternalPage>,
555        move_count: usize,
556        middle_key: &mut IntField,
557        mut edge_child_pid: BTreePageID,
558        fn_get_edge_left_child: impl Fn(
559            BTreePageID,
560            &Entry,
561        ) -> BTreePageID,
562        fn_get_edge_right_child: impl Fn(
563            BTreePageID,
564            &Entry,
565        ) -> BTreePageID,
566        fn_get_moved_child: impl Fn(&Entry) -> BTreePageID,
567    ) -> Result<Vec<usize>, SmallError> {
568        // Remember the entries for deletion later (cause we can't
569        // modify the page while iterating though it)
570        let mut moved_records = Vec::new();
571
572        for e in src_iter.take(move_count) {
573            // 1. delete the entry from the src page
574            moved_records.push(e.get_record_id());
575
576            // 2. insert new entry to dest page
577            let new_entry = Entry::new(
578                *middle_key,
579                &fn_get_edge_left_child(edge_child_pid, &e),
580                &fn_get_edge_right_child(edge_child_pid, &e),
581            );
582            dest.insert_entry(&new_entry)?;
583
584            // 3. update parent id for the moved child
585            self.set_parent_pid(
586                tx,
587                &fn_get_moved_child(&e),
588                &dest.get_pid(),
589            );
590
591            // 4. update key and edge child for the next iteration
592            *middle_key = e.get_key();
593            edge_child_pid = fn_get_moved_child(&e);
594        }
595        return Ok(moved_records);
596    }
597
598    /// Steal tuples from a sibling and copy them to the given page so
599    /// that both pages are at least half full.  Update the
600    /// parent's entry so that the key matches the key field of
601    /// the first tuple in the right-hand page.
602    ///
603    /// # Arguments
604    ///
605    /// - page           - the leaf page which is less than half full
606    /// - sibling        - the sibling which has tuples to spare
607    /// - parent         - the parent of the two leaf pages
608    /// - entry          - the entry in the parent pointing to the two
609    ///   leaf pages
610    /// - is_right_sibling - whether the sibling is a right-sibling
611    fn balancing_two_leaf_pages(
612        &self,
613        tx: &Transaction,
614        left_rc: Arc<RwLock<BTreeLeafPage>>,
615        right_rc: Arc<RwLock<BTreeLeafPage>>,
616    ) -> SmallResult {
617        let parent_rc = Unique::mut_page_cache()
618            .get_internal_page(
619                tx,
620                Permission::ReadWrite,
621                &left_rc.rl().get_parent_pid(),
622            )
623            .unwrap();
624        let mut entry = parent_rc
625            .rl()
626            .get_entry_by_children(
627                &left_rc.rl().get_pid(),
628                &right_rc.rl().get_pid(),
629            )
630            .unwrap();
631
632        let left_tuples = left_rc.rl().tuples_count();
633        let right_tuples = right_rc.rl().tuples_count();
634        if left_tuples + right_tuples
635            <= left_rc.rl().get_slots_count()
636        {
637            // if the two pages can be merged, merge them
638            return self.merge_leaf_page(
639                tx, left_rc, right_rc, parent_rc, &entry,
640            );
641        }
642
643        let move_count = (left_tuples + right_tuples) / 2
644            - cmp::min(left_tuples, right_tuples);
645        if move_count == 0 {
646            return self.merge_leaf_page(
647                tx, left_rc, right_rc, parent_rc, &entry,
648            );
649        }
650
651        let mut key = entry.get_key();
652
653        // hold left and right page
654        {
655            let mut left = left_rc.wl();
656            let mut right = right_rc.wl();
657
658            if left_tuples < right_tuples {
659                let iter = BTreeLeafPageIterator::new(&right);
660                let mut deleted_indexes = Vec::new();
661                for tuple in iter.take(move_count) {
662                    left.insert_tuple(&tuple);
663                    deleted_indexes.push(tuple.get_slot_number());
664                    key = tuple.get_field(self.key_field);
665                }
666                for i in deleted_indexes {
667                    right.delete_tuple(i);
668                }
669            } else {
670                let iter = BTreeLeafPageIterator::new(&left);
671                let mut deleted_indexes = Vec::new();
672                for tuple in iter.rev().take(move_count) {
673                    right.insert_tuple(&tuple);
674                    deleted_indexes.push(tuple.get_slot_number());
675                    key = tuple.get_field(self.key_field);
676                }
677                for i in deleted_indexes {
678                    left.delete_tuple(i);
679                }
680            }
681        }
682        // release left and right page
683
684        entry.set_key(key);
685        parent_rc.wl().update_entry(&entry);
686
687        Ok(())
688    }
689}