1use std::{cell::RefCell, mem::size_of, rc::Rc};
2
3use crate::{
4 bucket::{BucketMeta, InnerBucket, META_SIZE},
5 bytes::Bytes,
6 errors::Result,
7 freelist::TxFreelist,
8 page::{BranchElement, LeafElement, Page, PageID, PageType},
9};
10
11pub(crate) type NodeID = u64;
12
13const HEADER_SIZE: u64 = size_of::<Page>() as u64;
14const LEAF_SIZE: u64 = size_of::<LeafElement>() as u64;
15const BRANCH_SIZE: u64 = size_of::<BranchElement>() as u64;
16const MIN_KEYS_PER_NODE: usize = 2;
17const FILL_PERCENT: f32 = 0.5;
18
19pub(crate) struct Node<'n> {
20 pub(crate) id: NodeID,
21 pub(crate) page_id: PageID,
22 pub(crate) num_pages: u64,
23 pub(crate) children: Vec<NodeID>,
24 pub(crate) data: NodeData<'n>,
25 pub(crate) deleted: bool,
26 pub(crate) original_key: Option<Bytes<'n>>,
27 pub(crate) parent: Option<u64>,
28 pagesize: u64,
29 spilled: bool,
30}
31
32impl<'n> Node<'n> {
33 pub(crate) fn new(id: NodeID, t: PageType, pagesize: u64) -> Node<'n> {
36 let data: NodeData = match t {
37 Page::TYPE_BRANCH => NodeData::Branches(Vec::new()),
38 Page::TYPE_LEAF => NodeData::Leaves(Vec::new()),
39 _ => panic!("INVALID PAGE TYPE FOR NEW NODE"),
40 };
41 Node {
42 id,
43 page_id: 0,
44 num_pages: 0,
45 children: Vec::new(),
46 data,
47 deleted: false,
48 original_key: None,
49 pagesize,
50 spilled: false,
51 parent: None,
52 }
53 }
54
55 pub(crate) fn from_page(id: NodeID, p: &Page, pagesize: u64) -> Node<'n> {
58 let data: NodeData = match p.page_type {
59 Page::TYPE_BRANCH => {
60 let mut data = Vec::with_capacity(p.count as usize);
61 for branch in p.branch_elements() {
62 data.push(Branch {
63 key: Bytes::Slice(branch.key()),
64 page: branch.page,
65 });
66 }
67 NodeData::Branches(data)
68 }
69 Page::TYPE_LEAF => {
70 let mut data = Vec::with_capacity(p.count as usize);
71 for leaf in p.leaf_elements() {
72 data.push(Leaf::from_leaf(leaf));
73 }
74 NodeData::Leaves(data)
75 }
76 _ => panic!("INVALID PAGE TYPE FOR FROM_PAGE"),
77 };
78 let original_key = if data.len() > 0 {
79 Some(data.first_key())
80 } else {
81 None
82 };
83 Node {
84 id,
85 page_id: p.id,
86 num_pages: p.overflow + 1,
87 children: Vec::new(),
88 data,
89 deleted: false,
90 original_key,
91 pagesize,
92 spilled: false,
93 parent: None,
94 }
95 }
96
97 pub(crate) fn with_data(id: NodeID, data: NodeData<'n>, pagesize: u64) -> Node<'n> {
101 let original_key = Some(data.first_key());
102 Node {
103 id,
104 page_id: 0,
105 num_pages: 0,
106 children: Vec::new(),
107 data,
108 deleted: false,
109 original_key,
110 pagesize,
111 spilled: false,
112 parent: None,
113 }
114 }
115
116 pub(crate) fn insert_child(&mut self, id: NodeID, key: Bytes) {
117 match &mut self.data {
118 NodeData::Branches(branches) => {
119 debug_assert!(!self.children.contains(&id));
120 debug_assert!(branches
121 .binary_search_by_key(&key.as_ref(), |b| b.key())
122 .is_ok());
123 self.children.push(id);
124 }
125 NodeData::Leaves(_) => panic!("CANNOT INSERT BRANCH INTO A LEAF NODE"),
126 }
127 }
128
129 pub(crate) fn insert_data<'a>(&'a mut self, leaf: Leaf<'n>) {
130 match &mut self.data {
131 NodeData::Branches(_) => panic!("CANNOT INSERT DATA INTO A BRANCH NODE"),
132 NodeData::Leaves(leaves) => {
133 match leaves.binary_search_by_key(&leaf.key(), |l| l.key()) {
134 Ok(i) => leaves[i] = leaf,
135 Err(i) => leaves.insert(i, leaf),
136 };
137 }
138 }
139 }
140
141 pub(crate) fn insert_branch<'a>(
142 &'a mut self,
143 original_key: &Option<Bytes<'n>>,
144 branch: Branch<'n>,
145 ) {
146 let search_key = match original_key {
147 Some(k) => k.as_ref(),
148 None => branch.key(),
149 };
150 match &mut self.data {
151 NodeData::Leaves(_) => panic!("CANNOT INSERT BRANCH INTO A LEAF NODE"),
152 NodeData::Branches(branches) => {
153 match branches.binary_search_by_key(&search_key, |b| b.key()) {
154 Ok(i) => {
155 assert!(original_key.is_some());
156 branches[i] = branch
157 }
158 Err(i) => {
159 assert!(original_key.is_none());
160 branches.insert(i, branch)
161 }
162 };
163 }
164 }
165 }
166
167 pub(crate) fn delete<'a>(&'a mut self, index: usize) -> Leaf<'n> {
168 match &mut self.data {
169 NodeData::Branches(_) => panic!("CANNOT DELETE DATA FROM A BRANCH NODE"),
170 NodeData::Leaves(leaves) => leaves.remove(index),
171 }
172 }
173
174 pub(crate) fn leaf(&self) -> bool {
175 match &self.data {
176 NodeData::Branches(_) => false,
177 NodeData::Leaves(_) => true,
178 }
179 }
180
181 fn size(&self) -> u64 {
182 HEADER_SIZE + self.data.size()
183 }
184
185 pub(crate) fn needs_merging(&self) -> bool {
186 self.data.len() < MIN_KEYS_PER_NODE || self.size() < (self.pagesize / 4)
187 }
188
189 pub(crate) fn spill<'a>(
190 &'a mut self,
191 bucket: &'a mut InnerBucket<'n>,
192 tx_freelist: &'a mut TxFreelist,
193 parent: Option<&'a mut Self>,
194 ) -> Result<Option<PageID>> {
195 let mut root_page_id: Option<PageID> = None;
196 if self.spilled {
197 return Ok(root_page_id);
198 }
199 self.children
201 .sort_by_cached_key(|id| bucket.nodes[*id as usize].borrow().data.first_key());
202
203 let mut i = 0_usize;
205 while i < self.children.len() {
208 let child_id = self.children[i];
209 let child = bucket.nodes[child_id as usize].clone();
210 let mut child = child.borrow_mut();
211 child.spill(bucket, tx_freelist, Some(self))?;
212 i += 1;
213 }
214
215 let new_siblings = self.split(bucket);
216 self.write(tx_freelist)?;
218 if let Some(new_siblings) = &new_siblings {
219 self.write(tx_freelist)?;
222 for s in new_siblings.iter() {
223 let mut s = s.borrow_mut();
224 s.write(tx_freelist)?;
225 }
226 }
227 match parent {
229 Some(parent) => {
230 parent.insert_branch(&self.original_key, Branch::from_node(self));
235 if let Some(new_siblings) = new_siblings {
236 for s in new_siblings.iter() {
238 let s = s.borrow();
239 let branch = Branch::from_node(&s);
241 parent.insert_branch(&None, branch);
242 }
243 }
244 }
245 None => {
246 match new_siblings {
248 Some(new_siblings) => {
251 let mut branches: Vec<Branch> = Vec::with_capacity(new_siblings.len() + 1);
253 branches.push(Branch::from_node(self));
254 for s in new_siblings {
255 let s = s.borrow();
256 branches.push(Branch::from_node(&s));
257 }
258 let new_parent = bucket.new_node(NodeData::Branches(branches));
260 let mut new_parent = new_parent.borrow_mut();
261 match new_parent.spill(bucket, tx_freelist, None)? {
263 Some(page_id) => root_page_id = Some(page_id),
266 None => panic!("New parent did not return a new root_page_id"),
267 };
268 }
269
270 None => {
271 root_page_id = Some(self.page_id);
274 }
275 }
276 }
277 }
278
279 Ok(root_page_id)
280 }
281
282 pub(crate) fn split<'a>(
283 &'a mut self,
284 bucket: &'a mut InnerBucket<'n>,
285 ) -> Option<Vec<Rc<RefCell<Node<'n>>>>> {
286 if self.data.len() <= (MIN_KEYS_PER_NODE * 2) || self.size() < self.pagesize {
287 return None;
288 }
289 let threshold = ((self.pagesize as f32) * FILL_PERCENT) as u64;
290 let mut split_indexes = Vec::<usize>::new();
291 let mut current_size = HEADER_SIZE;
292 let mut count = 0;
293 match &self.data {
294 NodeData::Branches(b) => {
295 let len = b.len();
296 for (i, b) in b[..len - 2].iter().enumerate() {
297 if i > len - 3 {
298 break;
299 }
300 count += 1;
301 let size = BRANCH_SIZE + (b.key_size() as u64);
302 let new_size = current_size + size;
303 if count >= MIN_KEYS_PER_NODE && new_size > threshold {
304 split_indexes.push(i + 1);
305 current_size = HEADER_SIZE + size;
306 count = 0;
307 } else {
308 current_size = new_size;
309 }
310 }
311 }
312 NodeData::Leaves(leaves) => {
313 let len = leaves.len();
314 for (i, l) in leaves[..len - 2].iter().enumerate() {
315 count += 1;
319 let size = LEAF_SIZE + (l.size() as u64);
320 let new_size = current_size + size;
321 if count >= MIN_KEYS_PER_NODE && new_size > threshold {
322 split_indexes.push(i + 1);
323 current_size = HEADER_SIZE + size;
324 count = 0;
325 } else {
326 current_size = new_size;
327 }
328 }
329 }
330 };
331 if split_indexes.is_empty() {
333 return None;
334 }
335
336 #[allow(clippy::needless_collect)]
339 let new_data: Vec<NodeData> = split_indexes
340 .into_iter()
341 .rev()
343 .map(|i| self.data.split_at(i))
345 .collect();
346
347 Some(
349 new_data
350 .into_iter()
351 .rev()
353 .map(|data| bucket.new_node(data))
354 .collect(),
355 )
356 }
357
358 pub(crate) fn write(&mut self, tx_freelist: &mut TxFreelist) -> Result<()> {
360 if self.deleted {
361 return Ok(());
362 }
363 self.spilled = true;
364 let page = self.allocate(tx_freelist)?;
365 page.write_node(self, self.num_pages)
366 }
367
368 fn allocate<'a>(&'a mut self, tx_freelist: &'a mut TxFreelist) -> Result<&'n mut Page> {
370 self.free_page(tx_freelist);
371 let size = self.size();
372 let page = tx_freelist.allocate(size)?;
373 self.page_id = page.id;
374 self.num_pages = page.overflow + 1;
375 Ok(page)
376 }
377
378 pub(crate) fn free_page(&mut self, tx_freelist: &mut TxFreelist) {
380 if self.page_id != 0 {
381 tx_freelist.free(self.page_id, self.num_pages);
382 self.page_id = 0;
383 }
384 }
385}
386
387pub(crate) enum NodeData<'a> {
388 Branches(Vec<Branch<'a>>),
389 Leaves(Vec<Leaf<'a>>),
390}
391
392impl<'a> NodeData<'a> {
393 pub(crate) fn len(&self) -> usize {
394 match self {
395 NodeData::Branches(b) => b.len(),
396 NodeData::Leaves(l) => l.len(),
397 }
398 }
399
400 fn size(&self) -> u64 {
401 match self {
402 NodeData::Branches(b) => b.iter().fold(BRANCH_SIZE * b.len() as u64, |acc, b| {
403 acc + b.key_size() as u64
404 }),
405 NodeData::Leaves(l) => l
406 .iter()
407 .fold(LEAF_SIZE * l.len() as u64, |acc, l| acc + l.size() as u64),
408 }
409 }
410
411 pub(crate) fn first_key<'b>(&'b self) -> Bytes<'a> {
412 debug_assert!(self.len() > 0, "Cannot get key parts of empty data");
413 match self {
414 NodeData::Branches(b) => b[0].key.clone(),
415 NodeData::Leaves(l) => l[0].key_bytes(),
416 }
417 }
418
419 pub(crate) fn merge(&mut self, other_data: &mut Self) {
420 match (self, other_data) {
421 (NodeData::Branches(b1), NodeData::Branches(b2)) => {
422 b1.append(b2);
423 b1.sort_unstable_by_key(|b| b.key.clone());
424 }
425 (NodeData::Leaves(l1), NodeData::Leaves(l2)) => {
426 l1.append(l2);
427 l1.sort_unstable_by_key(|l| l.key_bytes());
428 let mut last = l1[0].key();
429 for l in l1[1..].iter() {
430 if last >= l.key() {
431 println!("HA. GOT 'EM!");
432 }
433 last = l.key();
434 }
435 }
436 _ => panic!("incompatible data types"),
437 }
438 }
439
440 fn split_at<'b>(&'b mut self, index: usize) -> NodeData<'a> {
441 match self {
442 NodeData::Branches(b) => NodeData::Branches(b.split_off(index)),
443 NodeData::Leaves(l) => NodeData::Leaves(l.split_off(index)),
444 }
445 }
446}
447
448pub(crate) struct Branch<'a> {
449 key: Bytes<'a>,
450 pub(crate) page: PageID,
451}
452
453impl<'a> Branch<'a> {
454 pub(crate) fn from_node<'b>(node: &'b Node<'a>) -> Branch<'a> {
455 Branch {
456 key: node.data.first_key(),
457 page: node.page_id,
458 }
459 }
460
461 pub(crate) fn key(&self) -> &[u8] {
462 self.key.as_ref()
463 }
464
465 pub(crate) fn key_size(&self) -> usize {
466 self.key.size()
467 }
468}
469
470#[derive(Clone)]
471pub(crate) enum Leaf<'a> {
472 Bucket(Bytes<'a>, BucketMeta),
473 Kv(Bytes<'a>, Bytes<'a>),
474}
475
476impl<'a> Leaf<'a> {
477 pub(crate) fn from_leaf<'b>(l: &'b LeafElement) -> Leaf<'a> {
478 match l.node_type {
479 Node::TYPE_DATA => Leaf::Kv(Bytes::Slice(l.key()), Bytes::Slice(l.value())),
480 Node::TYPE_BUCKET => Leaf::Bucket(Bytes::Slice(l.key()), l.value().into()),
481 _ => panic!("INVALID NODE TYPE"),
482 }
483 }
484
485 pub(crate) fn node_type(&self) -> NodeType {
486 match self {
487 Self::Bucket(_, _) => Node::TYPE_BUCKET,
488 Self::Kv(_, _) => Node::TYPE_DATA,
489 }
490 }
491
492 pub(crate) fn key_bytes<'b>(&'b self) -> Bytes<'a> {
493 match self {
494 Self::Bucket(name, _) => name.clone(),
495 Self::Kv(k, _) => k.clone(),
496 }
497 }
498
499 pub(crate) fn key(&self) -> &[u8] {
500 match self {
501 Self::Bucket(b, _) => b.as_ref(),
502 Self::Kv(k, _) => k.as_ref(),
503 }
504 }
505
506 pub(crate) fn value(&self) -> &[u8] {
507 match self {
508 Self::Bucket(_, meta) => meta.as_ref(),
509 Self::Kv(_, v) => v.as_ref(),
510 }
511 }
512
513 pub(crate) fn size(&self) -> usize {
514 match self {
515 Self::Bucket(b, _) => b.size() + META_SIZE,
516 Self::Kv(k, v) => k.size() + v.size(),
517 }
518 }
519
520 pub(crate) fn is_kv(&self) -> bool {
521 match self {
522 Self::Bucket(_, _) => false,
523 Self::Kv(_, _) => true,
524 }
525 }
526}
527
528pub(crate) type NodeType = u8;
530
531impl<'n> Node<'n> {
532 pub(crate) const TYPE_DATA: NodeType = 0x00;
533 pub(crate) const TYPE_BUCKET: NodeType = 0x01;
534}
535
536#[cfg(test)]
537mod test {
538 use std::collections::HashMap;
539
540 use super::*;
541 use crate::{
542 testutil::{rand_bytes, RandomFile},
543 OpenOptions,
544 };
545
546 #[test]
547 fn test_split() -> Result<()> {
548 let random_file = RandomFile::new();
549 let db = OpenOptions::new().pagesize(1024).open(&random_file)?;
550 {
552 let tx = db.tx(true)?;
553 let b = tx.create_bucket("a")?;
554 let mut data = HashMap::new();
555 for key in ["a", "b", "c", "d", "e", "f"] {
557 let value = rand_bytes(512);
558 b.put(key, value.clone())?;
559 data.insert(key, value);
560 }
561 {
562 let mut b = b.inner.borrow_mut();
564 assert!(b.nodes.len() == 1);
565
566 let tx_freelist = tx.inner.borrow().freelist.clone();
567 let mut tx_freelist = tx_freelist.borrow_mut();
568 b.spill(&mut tx_freelist)?;
569 assert!(b.nodes.len() == 4);
572 let branch_node = &b.nodes[3];
574 let branch_node = branch_node.borrow();
575 if let NodeData::Branches(branches) = &branch_node.data {
576 assert!(branches.len() == 3);
577
578 assert!(branches[0].key() == b"a");
579 assert!(branches[0].page == 7);
580
581 assert!(branches[1].key() == b"c");
582 assert!(branches[1].page == 10);
583
584 assert!(branches[2].key() == b"e");
585 assert!(branches[2].page == 13);
586 } else {
587 panic!("Node 3 should have been a branch node")
588 }
589 for (n, keys) in b.nodes[0..=2]
591 .iter()
592 .zip([["a", "b"], ["c", "d"], ["e", "f"]])
593 {
594 let n = n.borrow();
595 assert!(n.data.len() == 2);
596 match &n.data {
597 NodeData::Leaves(leaves) => {
598 for (kv, key) in leaves.iter().zip(keys) {
599 assert!(kv.key() == key.as_bytes());
600 assert!(kv.value() == data[key]);
601 }
602 }
603 _ => panic!("Must be a leaf node"),
604 }
605 }
606 }
607 }
608 Ok(())
609 }
610}