simple_db_rust/btree/
page.rs

1use std::{cell::RefCell, convert::TryInto, fmt, rc::Rc};
2
3use bit_vec::BitVec;
4use log::{debug, info};
5
6use crate::{field::get_type_length, Catalog};
7
8use super::{
9    buffer_pool::BufferPool,
10    tuple::{Tuple, TupleScheme},
11};
12
13use super::consts::INDEX_SIZE;
14
15#[derive(PartialEq, Copy, Clone, Eq, Hash)]
16pub enum PageCategory {
17    RootPointer,
18    Internal,
19    Leaf,
20    Header,
21}
22
23impl fmt::Display for PageCategory {
24    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25        match self {
26            PageCategory::RootPointer => {
27                write!(f, "ROOT_POINTER")
28            }
29            PageCategory::Internal => {
30                write!(f, "INTERNAL")
31            }
32            PageCategory::Leaf => {
33                write!(f, "LEAF")
34            }
35            PageCategory::Header => {
36                write!(f, "HEADER")
37            }
38        }
39    }
40}
41
42impl fmt::Debug for PageCategory {
43    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44        write!(f, "{}", self)
45    }
46}
47
48#[test]
49fn test_page_category() {
50    assert_ne!(PageCategory::Header, PageCategory::Leaf);
51    if PageCategory::Leaf == PageCategory::RootPointer {
52        println!("error")
53    } else {
54        println!("ok")
55    }
56    let c = PageCategory::Header;
57    match c {
58        PageCategory::Leaf => {
59            println!("error")
60        }
61        PageCategory::Header => {
62            println!("ok")
63        }
64        _ => {}
65    }
66    println!("{}", c);
67    assert_eq!(format!("{}", c), "HEADER");
68
69    let c = PageCategory::Internal;
70    println!("{}", c);
71    assert_eq!(format!("{}", c), "INTERNAL");
72    assert_eq!(format!("{:?}", c), "INTERNAL");
73}
74
75pub struct BTreeBasePage {
76    pid: BTreePageID,
77
78    parent_pid: BTreePageID,
79}
80
81impl BTreeBasePage {
82    pub fn get_pid(&self) -> BTreePageID {
83        self.pid
84    }
85
86    pub fn get_parent_pid(&self) -> BTreePageID {
87        self.parent_pid
88    }
89
90    pub fn set_parent_pid(&mut self, pid: &BTreePageID) {
91        self.parent_pid = pid.clone();
92    }
93
94    pub fn empty_page_data() -> Vec<u8> {
95        let data: Vec<u8> = vec![0; BufferPool::get_page_size()];
96        data
97    }
98}
99
100pub struct BTreeLeafPage {
101    page: BTreeBasePage,
102
103    pub slot_count: usize,
104
105    // indicate slots' status: true means occupied, false means empty
106    header: BitVec<u32>,
107
108    // all tuples (include empty tuples)
109    tuples: Vec<Tuple>,
110
111    pub tuple_scheme: TupleScheme,
112
113    right_sibling_id: usize,
114
115    key_field: usize,
116}
117
118impl std::ops::Deref for BTreeLeafPage {
119    type Target = BTreeBasePage;
120    fn deref(&self) -> &Self::Target {
121        &self.page
122    }
123}
124
125impl std::ops::DerefMut for BTreeLeafPage {
126    fn deref_mut(&mut self) -> &mut Self::Target {
127        &mut self.page
128    }
129}
130
131impl BTreeLeafPage {
132    pub fn new(
133        page_id: &BTreePageID,
134        bytes: Vec<u8>,
135        tuple_scheme: &TupleScheme,
136        key_field: usize,
137    ) -> Self {
138        let slot_count = Self::get_max_tuples(&tuple_scheme);
139        let header_size = Self::get_header_size(slot_count) as usize;
140
141        // init tuples
142        let mut tuples = Vec::new();
143        for i in 0..slot_count {
144            let start = header_size + i * tuple_scheme.get_size();
145            let end = start + tuple_scheme.get_size();
146            let t = Tuple::new(tuple_scheme.clone(), &bytes[start..end]);
147            tuples.push(t);
148        }
149
150        Self {
151            page: BTreeBasePage {
152                pid: page_id.clone(),
153                parent_pid: BTreePageID::empty(),
154            },
155            slot_count,
156            header: BitVec::from_bytes(&bytes[..header_size]),
157            tuples,
158            tuple_scheme: tuple_scheme.clone(),
159            right_sibling_id: 0,
160            key_field,
161        }
162    }
163
164    pub fn set_right_sibling_pid(&mut self, pid: Option<BTreePageID>) {
165        match pid {
166            Some(pid) => {
167                self.right_sibling_id = pid.page_index;
168            }
169            None => {}
170        }
171    }
172
173    pub fn get_right_sibling_pid(&self) -> Option<BTreePageID> {
174        if self.right_sibling_id == 0 {
175            return None;
176        } else {
177            return Some(BTreePageID::new(
178                PageCategory::Leaf,
179                self.pid.table_id,
180                self.right_sibling_id,
181            ));
182        }
183    }
184
185    /**
186    Retrieve the maximum number of tuples this page can hold.
187    */
188    pub fn get_max_tuples(scheme: &TupleScheme) -> usize {
189        let bits_per_tuple_including_header = scheme.get_size() * 8 + 1;
190        // extraBits are: left sibling pointer, right sibling pointer, parent
191        // pointer
192        let index_size: usize = 4;
193        let extra_bits = 3 * index_size * 8;
194        (BufferPool::get_page_size() * 8 - extra_bits)
195            / bits_per_tuple_including_header
196    }
197
198    pub fn empty_slots_count(&self) -> usize {
199        let mut count = 0;
200        for i in 0..self.slot_count {
201            if !self.is_slot_used(i) {
202                count += 1;
203            }
204        }
205        count
206    }
207
208    /// Returns the number of tuples currently stored on this page
209    pub fn tuples_count(&self) -> usize {
210        self.slot_count - self.empty_slots_count()
211    }
212
213    // Computes the number of bytes in the header of
214    // a page in a BTreeFile with each tuple occupying
215    // tupleSize bytes
216    pub fn get_header_size(slot_count: usize) -> usize {
217        slot_count / 8 + 1
218    }
219
220    /**
221    Adds the specified tuple to the page such that all records remain in
222    sorted order; the tuple should be updated to reflect
223    that it is now stored on this page.
224    tuple: The tuple to add.
225    */
226    pub fn insert_tuple(&mut self, tuple: &Tuple) {
227        // find the first empty slot
228        let mut first_empty_slot: i32 = 0;
229        for i in 0..self.slot_count {
230            if !self.is_slot_used(i) {
231                first_empty_slot = i as i32;
232                // debug!("first emply slot: {}", first_empty_slot);
233                break;
234            }
235        }
236
237        // Find the last key less than or equal to the key being inserted.
238        //
239        // -1 indicate there is no such key less than tuple.key, so the tuple
240        // should be inserted in slot 0 (-1 + 1).
241        let mut last_less_slot: i32 = -1;
242        for i in 0..self.slot_count {
243            if self.is_slot_used(i) {
244                if self.tuples[i].get_field(self.key_field)
245                    < tuple.get_field(self.key_field)
246                {
247                    last_less_slot = i as i32;
248                } else {
249                    break;
250                }
251            }
252        }
253
254        // shift records back or forward to fill empty slot and make room for
255        // new record while keeping records in sorted order
256        let good_slot: usize;
257        if first_empty_slot < last_less_slot {
258            for i in first_empty_slot..last_less_slot {
259                self.move_tuple((i + 1) as usize, i as usize);
260            }
261            good_slot = last_less_slot as usize;
262        } else {
263            for i in (last_less_slot + 1..first_empty_slot).rev() {
264                self.move_tuple(i as usize, (i + 1) as usize);
265            }
266            good_slot = (last_less_slot + 1) as usize;
267        }
268
269        // insert new record into the correct spot in sorted order
270        self.tuples[good_slot] = tuple.clone();
271        self.mark_slot_status(good_slot, true);
272
273        debug!(
274            "good slot: {}, first: {}, last: {}",
275            good_slot, first_empty_slot, last_less_slot
276        );
277    }
278
279    // Move a tuple from one slot to another slot, destination must be empty
280    fn move_tuple(&mut self, from: usize, to: usize) {
281        self.tuples[to] = self.tuples[from].clone();
282        self.mark_slot_status(to, true);
283        self.mark_slot_status(from, false);
284    }
285
286    pub fn get_tuple(&self, slot_index: usize) -> Option<Tuple> {
287        if self.is_slot_used(slot_index) {
288            return Some(self.tuples[slot_index].clone());
289        }
290        None
291    }
292
293    pub fn delete_tuple(&mut self, slot_index: &usize) {
294        self.mark_slot_status(*slot_index, false);
295    }
296
297    /**
298    Returns true if associated slot on this page is filled.
299    */
300    pub fn is_slot_used(&self, slot_index: usize) -> bool {
301        self.header[slot_index]
302    }
303
304    /*
305    mark the slot as empty/filled.
306    */
307    pub fn mark_slot_status(&mut self, slot_index: usize, used: bool) {
308        self.header.set(slot_index, used);
309    }
310}
311
312pub struct BTreeLeafPageIterator {
313    page: Rc<RefCell<BTreeLeafPage>>,
314    cursor: usize,
315}
316
317impl BTreeLeafPageIterator {
318    pub fn new(page: Rc<RefCell<BTreeLeafPage>>) -> Self {
319        Self { page, cursor: 0 }
320    }
321}
322
323impl Iterator for BTreeLeafPageIterator {
324    type Item = Tuple;
325
326    fn next(&mut self) -> Option<Self::Item> {
327        let page = (*self.page).borrow();
328        while self.cursor < page.slot_count {
329            if page.is_slot_used(self.cursor) {
330                let tuple = page.tuples[self.cursor].clone();
331                self.cursor += 1;
332                return Some(tuple);
333            } else {
334                self.cursor += 1;
335            }
336        }
337
338        None
339    }
340}
341
342pub struct BTreeLeafPageReverseIterator<'page> {
343    page: &'page BTreeLeafPage,
344    cursor: usize,
345}
346
347impl<'page> BTreeLeafPageReverseIterator<'page> {
348    pub fn new(page: &'page BTreeLeafPage) -> Self {
349        Self {
350            page,
351            cursor: page.slot_count,
352        }
353    }
354}
355
356impl<'page> Iterator for BTreeLeafPageReverseIterator<'_> {
357    type Item = Tuple;
358
359    fn next(&mut self) -> Option<Self::Item> {
360        loop {
361            if self.page.is_slot_used(self.cursor) {
362                let tuple = self.page.tuples[self.cursor].clone();
363                self.cursor -= 1;
364                return Some(tuple);
365            } else if self.cursor == 0 {
366                return None;
367            } else {
368                self.cursor -= 1;
369            }
370        }
371    }
372}
373
374// Why we need boot BTreeRootPointerPage and BTreeRootPage?
375// Because as the tree rebalance (growth, shrinking), location
376// of the rootpage will change. So we need the BTreeRootPointerPage,
377// which is always placed at the beginning of the database file
378// and points to the rootpage. So we can find the location of
379// rootpage easily.
380pub struct BTreeRootPointerPage {
381    base: BTreeBasePage,
382
383    root_pid: BTreePageID,
384}
385
386impl std::ops::Deref for BTreeRootPointerPage {
387    type Target = BTreeBasePage;
388    fn deref(&self) -> &Self::Target {
389        &self.base
390    }
391}
392
393impl std::ops::DerefMut for BTreeRootPointerPage {
394    fn deref_mut(&mut self) -> &mut Self::Target {
395        &mut self.base
396    }
397}
398
399impl BTreeRootPointerPage {
400    pub fn new(pid: &BTreePageID, bytes: Vec<u8>) -> Self {
401        let root_page_index =
402            i32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
403        let root_pid = BTreePageID {
404            category: PageCategory::Leaf,
405            page_index: root_page_index,
406
407            // TODO: set table id
408            table_id: 0,
409        };
410        Self {
411            base: BTreeBasePage {
412                pid: pid.clone(),
413                parent_pid: BTreePageID::empty(),
414            },
415
416            root_pid,
417        }
418    }
419
420    pub fn get_root_pid(&self) -> BTreePageID {
421        self.root_pid
422    }
423
424    pub fn set_root_pid(&mut self, pid: &BTreePageID) {
425        self.root_pid = *pid;
426    }
427}
428
429// PageID identifies a unique page, and contains the
430// necessary metadata
431#[derive(Copy, Clone, PartialEq, Eq, Hash)]
432pub struct BTreePageID {
433    // category indicates the category of the page
434    pub category: PageCategory,
435
436    // page_index represents the position of the page in
437    // the table, start from 0
438    pub page_index: usize,
439
440    pub table_id: i32,
441}
442
443impl fmt::Display for BTreePageID {
444    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
445        write!(
446            f,
447            "<BTreePageID, catagory: {}, page_index: {}, table_id: {}>",
448            self.category, self.page_index, self.table_id,
449        )
450    }
451}
452
453impl fmt::Debug for BTreePageID {
454    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
455        write!(f, "{}", self)
456    }
457}
458
459impl BTreePageID {
460    pub fn new(
461        category: PageCategory,
462        table_id: i32,
463        page_index: usize,
464    ) -> Self {
465        Self {
466            category,
467            page_index,
468            table_id,
469        }
470    }
471
472    pub fn empty() -> Self {
473        Self {
474            category: PageCategory::RootPointer,
475            page_index: 0,
476            table_id: 0,
477        }
478    }
479
480    pub fn get_table_id(&self) -> &i32 {
481        &self.table_id
482    }
483}
484
485pub struct BTreeInternalPage {
486    page: BTreeBasePage,
487
488    pub keys: Vec<i32>,
489    pub children: Vec<BTreePageID>,
490
491    slot_count: usize,
492
493    // header bytes
494    header: BitVec<u32>,
495
496    tuple_scheme: TupleScheme,
497
498    key_field: usize,
499}
500
501impl std::ops::Deref for BTreeInternalPage {
502    type Target = BTreeBasePage;
503    fn deref(&self) -> &Self::Target {
504        &self.page
505    }
506}
507
508impl std::ops::DerefMut for BTreeInternalPage {
509    fn deref_mut(&mut self) -> &mut Self::Target {
510        &mut self.page
511    }
512}
513
514impl BTreeInternalPage {
515    pub fn new(
516        page_id: &BTreePageID,
517        bytes: Vec<u8>,
518        tuple_scheme: &TupleScheme,
519        key_field: usize,
520    ) -> Self {
521        let key_size =
522            get_type_length(tuple_scheme.fields[key_field].field_type);
523        let slot_count = Self::get_max_entries(key_size) + 1;
524        let header_size = Self::get_header_size(slot_count) as usize;
525
526        let mut keys: Vec<i32> = Vec::new();
527        let mut children: Vec<BTreePageID> = Vec::new();
528        keys.resize(slot_count, 0);
529        children.resize(slot_count, BTreePageID::new(PageCategory::Leaf, 0, 0));
530
531        Self {
532            page: BTreeBasePage {
533                pid: page_id.clone(),
534                parent_pid: BTreePageID::empty(),
535            },
536            keys,
537            children,
538            slot_count,
539            header: BitVec::from_bytes(&bytes[..header_size]),
540            tuple_scheme: tuple_scheme.clone(),
541            key_field,
542        }
543    }
544
545    pub fn dig(&self) {
546        info!("page id: {}, parent pid: {}", self.pid, self.parent_pid);
547        info!("empty slot count: {}", self.empty_slots_count());
548        info!("keys: {:?}", self.keys);
549        info!("children: {:?}", self.children);
550        let it = BTreeInternalPageIterator::new(self);
551        for (i, e) in it.enumerate() {
552            info!("{}: {}", i, e);
553        }
554    }
555
556    fn get_header_size(max_entries_count: usize) -> usize {
557        // +1 for children
558        let slots = max_entries_count + 1;
559        slots / 8 + 1
560    }
561
562    /**
563    Retrieve the maximum number of entries this page can hold. (The number of keys)
564    */
565    pub fn get_max_entries(key_size: usize) -> usize {
566        let bits_per_entry_including_header = key_size * 8 + INDEX_SIZE * 8 + 1;
567        /*
568        extraBits are: one parent pointer, 1 byte for child page category,
569        one extra child pointer (node with m entries has m+1 pointers to
570        children),
571        1 bit for extra header (why?)
572        */
573        let extra_bits = 2 * INDEX_SIZE * 8 + 8;
574        let entries_per_page = (BufferPool::get_page_size() * 8 - extra_bits)
575            / bits_per_entry_including_header; //round down
576        return entries_per_page;
577    }
578
579    pub fn get_page_id(&self) -> BTreePageID {
580        self.pid
581    }
582
583    pub fn get_entry(&self, index: usize) -> Option<Entry> {
584        if self.is_slot_used(index) {
585            Some(Entry::new(
586                self.keys[index],
587                &self.children[index - 1],
588                &self.children[index],
589            ))
590        } else {
591            None
592        }
593    }
594
595    pub fn empty_slots_count(&self) -> usize {
596        let mut count = 0;
597        // start from 1 because the first key slot is not used
598        // since a node with m keys has m+1 pointers
599        for i in 1..self.slot_count {
600            if !self.is_slot_used(i) {
601                count += 1
602            }
603        }
604        count
605    }
606
607    pub fn entries_count(&self) -> usize {
608        self.slot_count - self.empty_slots_count() - 1
609    }
610
611    pub fn delete_entry(&mut self, index: usize) {
612        self.mark_slot_status(index, false);
613    }
614
615    /**
616    Returns true if associated slot on this page is filled.
617    */
618    pub fn is_slot_used(&self, slot_index: usize) -> bool {
619        self.header[slot_index]
620    }
621
622    pub fn insert_entry(&mut self, e: &Entry) {
623        // if this is the first entry, add it and return
624        if self.empty_slots_count() == Self::get_max_entries(4) {
625            self.children[0] = e.get_left_child();
626            self.children[1] = e.get_right_child();
627            self.keys[1] = e.get_key();
628            self.mark_slot_status(0, true);
629            self.mark_slot_status(1, true);
630            return;
631        }
632
633        // find the first empty slot, start from 1
634        let mut empty_slot: i32 = -1;
635        for i in 0..self.slot_count {
636            if !self.is_slot_used(i) {
637                empty_slot = i as i32;
638                break;
639            }
640        }
641
642        // if there is no empty slot, return
643        if empty_slot == -1 {
644            panic!("no empty slot");
645        }
646
647        // find the child pointer matching the left or right child in this entry
648        let mut less_or_eq_slot = -1;
649        for i in 0..self.slot_count {
650            if !self.is_slot_used(i) {
651                continue;
652            }
653
654            if self.children[i] == e.get_left_child() {
655                // gotcha
656                less_or_eq_slot = i as i32;
657
658                // we not break here, but break on the next iteration
659                // to validate the keys is in order
660                continue;
661            }
662
663            if self.children[i] == e.get_right_child() {
664                // gotcha
665                less_or_eq_slot = i as i32;
666
667                // update right child of current entry
668                self.children[i] = e.get_left_child();
669
670                // we not break here, but break on the next iteration
671                // to validate the keys is in order
672                continue;
673            }
674
675            // validate that the next key is greater than or equal to the one we
676            // are inserting
677            if less_or_eq_slot != -1 {
678                if self.keys[i] < e.get_key() {
679                    panic!("key is not in order");
680                }
681                break;
682            }
683        }
684
685        if less_or_eq_slot == -1 {
686            info!("you are try to insert: {}", e);
687            info!("page id: {}", self.pid);
688            panic!("no less or equal slot",);
689        }
690
691        // shift entries back or forward to fill empty slot and make room for
692        // new entry while keeping entries in sorted order
693        let good_slot: i32;
694        if empty_slot < less_or_eq_slot {
695            for i in empty_slot..less_or_eq_slot {
696                self.move_entry((i + 1) as usize, i as usize);
697            }
698            good_slot = less_or_eq_slot
699        } else {
700            for i in (less_or_eq_slot + 1..empty_slot).rev() {
701                self.move_entry(i as usize, i as usize + 1);
702            }
703            good_slot = less_or_eq_slot + 1
704        }
705
706        self.keys[good_slot as usize] = e.get_key();
707        self.children[good_slot as usize] = e.get_right_child();
708        self.mark_slot_status(good_slot as usize, true);
709    }
710
711    fn move_entry(&mut self, from: usize, to: usize) {
712        if self.is_slot_used(from) && !self.is_slot_used(to) {
713            self.keys[to] = self.keys[from];
714            self.children[to] = self.children[from];
715            self.children[to - 1] = self.children[from - 1];
716            self.mark_slot_status(from, false);
717            self.mark_slot_status(to, true);
718        } else {
719            panic!("move_entry: invalid slot, from: {}, to: {}", from, to);
720        }
721    }
722
723    fn mark_slot_status(&mut self, slot_index: usize, used: bool) {
724        self.header.set(slot_index, used);
725    }
726}
727
728/*
729All of the entries or tuples in the left child page should be less than or equal to
730the key, and all of the entries or tuples in the right child page should be greater
731than or equal to the key.
732*/
733#[derive(Clone, Copy)]
734pub struct Entry {
735    key: i32,
736    left: BTreePageID,
737    right: BTreePageID,
738
739    // record position in the page
740    record_id: usize,
741}
742
743impl Entry {
744    pub fn new(key: i32, left: &BTreePageID, right: &BTreePageID) -> Self {
745        Self {
746            key,
747            left: *left,
748            right: *right,
749
750            record_id: 0,
751        }
752    }
753
754    pub fn set_record_id(&mut self, record_id: usize) {
755        self.record_id = record_id;
756    }
757
758    pub fn get_record_id(&self) -> usize {
759        self.record_id
760    }
761
762    pub fn get_key(&self) -> i32 {
763        self.key
764    }
765
766    pub fn get_left_child(&self) -> BTreePageID {
767        self.left
768    }
769
770    pub fn get_right_child(&self) -> BTreePageID {
771        self.right
772    }
773}
774
775impl fmt::Display for Entry {
776    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
777        write!(f, "({}, {}, {})", self.key, self.left, self.right)
778    }
779}
780
781pub struct BTreeInternalPageIterator<'page> {
782    page: &'page BTreeInternalPage,
783    cursor: usize,
784}
785
786impl<'page> BTreeInternalPageIterator<'page> {
787    pub fn new(page: &'page BTreeInternalPage) -> Self {
788        Self { page, cursor: 0 }
789    }
790}
791
792impl Iterator for BTreeInternalPageIterator<'_> {
793    type Item = Entry;
794
795    fn next(&mut self) -> Option<Self::Item> {
796        loop {
797            self.cursor += 1;
798            if self.cursor >= self.page.slot_count {
799                return None;
800            }
801
802            if !self.page.is_slot_used(self.cursor) {
803                continue;
804            }
805            return Some(Entry::new(
806                self.page.keys[self.cursor],
807                &self.page.children[self.cursor - 1],
808                &self.page.children[self.cursor],
809            ));
810        }
811    }
812}
813
814pub struct BTreeInternalPageReverseIterator<'page> {
815    page: &'page BTreeInternalPage,
816    cursor: usize,
817}
818
819impl<'page> BTreeInternalPageReverseIterator<'page> {
820    pub fn new(page: &'page BTreeInternalPage) -> Self {
821        Self {
822            page,
823            cursor: page.slot_count,
824        }
825    }
826}
827
828impl Iterator for BTreeInternalPageReverseIterator<'_> {
829    type Item = Entry;
830
831    fn next(&mut self) -> Option<Self::Item> {
832        loop {
833            self.cursor -= 1;
834
835            // entries start from 1
836            if self.cursor < 1 {
837                return None;
838            }
839
840            if !self.page.is_slot_used(self.cursor) {
841                continue;
842            }
843            let mut e = Entry::new(
844                self.page.keys[self.cursor],
845                &self.page.children[self.cursor - 1],
846                &self.page.children[self.cursor],
847            );
848            e.set_record_id(self.cursor);
849            return Some(e);
850        }
851    }
852}
853
854pub fn empty_page_data() -> Vec<u8> {
855    let data: Vec<u8> = vec![0; BufferPool::get_page_size()];
856    data
857}