scdb/internal/entries/headers/
inverted_index_header.rs

1use crate::internal::entries::headers::shared::{
2    DerivedHeaderProps, Header, DEFAULT_DB_MAX_KEYS, HEADER_SIZE_IN_BYTES,
3};
4use crate::internal::utils;
5use std::fmt::{Display, Formatter};
6use std::io;
7
8pub(crate) const DEFAULT_MAX_INDEX_KEY_LEN: u32 = 3;
9
10#[derive(Debug, PartialEq, Clone, Eq, Ord, PartialOrd)]
11pub(crate) struct InvertedIndexHeader {
12    pub(crate) title: String,
13    pub(crate) block_size: u32,
14    pub(crate) max_keys: u64,
15    pub(crate) redundant_blocks: u16,
16    pub(crate) items_per_index_block: u64,
17    pub(crate) number_of_index_blocks: u64,
18    pub(crate) values_start_point: u64,
19    pub(crate) net_block_size: u64,
20    pub(crate) max_index_key_len: u32,
21}
22
23impl InvertedIndexHeader {
24    /// Creates a new InvertedIndexHeader
25    pub(crate) fn new(
26        max_keys: Option<u64>,
27        redundant_blocks: Option<u16>,
28        block_size: Option<u32>,
29        max_index_key_len: Option<u32>,
30    ) -> Self {
31        let max_index_key_len = max_index_key_len.unwrap_or(DEFAULT_MAX_INDEX_KEY_LEN);
32        let max_keys = max_keys.unwrap_or(DEFAULT_DB_MAX_KEYS * (max_index_key_len as u64));
33        let redundant_blocks = redundant_blocks.unwrap_or(1);
34        let block_size = block_size.unwrap_or_else(utils::get_vm_page_size);
35        let derived_props = DerivedHeaderProps::new(block_size, max_keys, redundant_blocks);
36
37        Self {
38            title: "ScdbIndex v0.001".to_string(),
39            block_size,
40            max_keys,
41            redundant_blocks,
42            items_per_index_block: derived_props.items_per_index_block,
43            number_of_index_blocks: derived_props.number_of_index_blocks,
44            values_start_point: derived_props.values_start_point,
45            net_block_size: derived_props.net_block_size,
46            max_index_key_len,
47        }
48    }
49}
50
51impl Header for InvertedIndexHeader {
52    #[inline(always)]
53    fn get_items_per_index_block(&self) -> u64 {
54        self.items_per_index_block
55    }
56
57    #[inline(always)]
58    fn get_number_of_index_blocks(&self) -> u64 {
59        self.number_of_index_blocks
60    }
61
62    #[inline(always)]
63    fn get_net_block_size(&self) -> u64 {
64        self.net_block_size
65    }
66
67    fn as_bytes(&self) -> Vec<u8> {
68        self.title
69            .as_bytes()
70            .iter()
71            .chain(&self.block_size.to_be_bytes())
72            .chain(&self.max_keys.to_be_bytes())
73            .chain(&self.redundant_blocks.to_be_bytes())
74            .chain(&self.max_index_key_len.to_be_bytes())
75            .chain(&[0u8; 66])
76            .map(|v| v.to_owned())
77            .collect()
78    }
79
80    fn from_data_array(data: &[u8]) -> io::Result<Self> {
81        if data.len() < HEADER_SIZE_IN_BYTES as usize {
82            return Err(io::Error::new(
83                io::ErrorKind::InvalidData,
84                format!(
85                    "data should be at least {} bytes in length",
86                    HEADER_SIZE_IN_BYTES
87                ),
88            ));
89        }
90
91        let title = String::from_utf8(data[0..16].to_owned())
92            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
93        let block_size = u32::from_be_bytes(utils::slice_to_array::<4>(&data[16..20])?);
94        let max_keys = u64::from_be_bytes(utils::slice_to_array::<8>(&data[20..28])?);
95        let redundant_blocks = u16::from_be_bytes(utils::slice_to_array::<2>(&data[28..30])?);
96        let max_index_key_len = u32::from_be_bytes(utils::slice_to_array::<4>(&data[30..34])?);
97        let derived_props = DerivedHeaderProps::new(block_size, max_keys, redundant_blocks);
98
99        let header = Self {
100            title,
101            block_size,
102            max_keys,
103            redundant_blocks,
104            items_per_index_block: derived_props.items_per_index_block,
105            number_of_index_blocks: derived_props.number_of_index_blocks,
106            values_start_point: derived_props.values_start_point,
107            net_block_size: derived_props.net_block_size,
108            max_index_key_len,
109        };
110
111        Ok(header)
112    }
113}
114
115impl Display for InvertedIndexHeader {
116    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
117        write!(f, "InvertedIndexHeader {{ title: {}, block_size: {}, max_keys: {}, redundant_blocks: {}, items_per_index_block: {}, number_of_index_blocks: {}, values_start_point: {}, net_block_size: {}, max_index_key_len: {}}}",
118               self.title,
119               self.block_size,
120               self.max_keys,
121               self.redundant_blocks,
122               self.items_per_index_block,
123               self.number_of_index_blocks,
124               self.values_start_point,
125               self.net_block_size,
126               self.max_index_key_len,
127        )
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::internal::utils::get_vm_page_size;
135    use std::fs::{File, OpenOptions};
136    use std::io;
137
138    use crate::internal::entries::headers::shared::HEADER_SIZE_IN_BYTES;
139    use serial_test::serial;
140    use std::io::{Seek, SeekFrom, Write};
141
142    #[test]
143    #[serial]
144    fn search_file_header_new() {
145        let block_size = get_vm_page_size();
146        type Record = (Option<u64>, Option<u16>, Option<u32>, InvertedIndexHeader);
147        let test_table: Vec<Record> = vec![
148            (
149                None,
150                None,
151                None,
152                generate_header(3_000_000, 1, block_size, 3),
153            ),
154            (
155                Some(24_000_000),
156                None,
157                None,
158                generate_header(24_000_000, 1, block_size, 3),
159            ),
160            (
161                None,
162                Some(9),
163                None,
164                generate_header(3_000_000, 9, block_size, 3),
165            ),
166            (
167                Some(24_000_000),
168                Some(5),
169                None,
170                generate_header(24_000_000, 5, block_size, 3),
171            ),
172            (
173                None,
174                None,
175                Some(9),
176                generate_header(9_000_000, 1, block_size, 9),
177            ),
178            (
179                Some(1_000_000),
180                None,
181                Some(9),
182                generate_header(1_000_000, 1, block_size, 9),
183            ),
184            (
185                Some(24_000_000),
186                Some(6),
187                Some(7),
188                generate_header(24_000_000, 6, block_size, 7),
189            ),
190        ];
191
192        for (max_keys, redundant_blocks, max_index_key_len, expected) in test_table {
193            let got = InvertedIndexHeader::new(max_keys, redundant_blocks, None, max_index_key_len);
194            assert_eq!(&got, &expected);
195        }
196    }
197
198    #[test]
199    #[serial]
200    fn search_file_header_as_bytes_works() {
201        let block_size_bytes = get_vm_page_size().to_be_bytes().to_vec();
202        // title: "ScdbIndex v0.001"
203        let title_bytes = vec![
204            83u8, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49,
205        ];
206        let reserve_bytes = vec![0u8; 66];
207        type Record = (Option<u64>, Option<u16>, Option<u32>, Vec<u8>);
208        let test_table: Vec<Record> = vec![
209            (
210                None,
211                None,
212                None,
213                vec![
214                    title_bytes.clone(),
215                    block_size_bytes.clone(),
216                    /* max_keys 3_000_000u64 */ vec![0, 0, 0, 0, 0, 45, 198, 192],
217                    /* redundant_blocks 1u16 */ vec![0, 1],
218                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
219                    reserve_bytes.clone(),
220                ]
221                .concat(),
222            ),
223            (
224                Some(24_000_000),
225                None,
226                None,
227                vec![
228                    title_bytes.clone(),
229                    block_size_bytes.clone(),
230                    /* max_keys 24_000_000 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
231                    /* redundant_blocks 1u16 */ vec![0, 1],
232                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
233                    reserve_bytes.clone(),
234                ]
235                .concat(),
236            ),
237            (
238                None,
239                Some(9),
240                None,
241                vec![
242                    title_bytes.clone(),
243                    block_size_bytes.clone(),
244                    /* max_keys 3_000_000u64 */ vec![0, 0, 0, 0, 0, 45, 198, 192],
245                    /* redundant_blocks 9u16 */ vec![0, 9],
246                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
247                    reserve_bytes.clone(),
248                ]
249                .concat(),
250            ),
251            (
252                None,
253                None,
254                Some(9),
255                vec![
256                    title_bytes.clone(),
257                    block_size_bytes.clone(),
258                    /* max_keys 9_000_000u64 */ vec![0, 0, 0, 0, 0, 137, 84, 64],
259                    /* redundant_blocks 1u16 */ vec![0, 1],
260                    /* max_index_key_len 9u32 */ vec![0, 0, 0, 9],
261                    reserve_bytes.clone(),
262                ]
263                .concat(),
264            ),
265            (
266                Some(24_000_000),
267                Some(5),
268                Some(9),
269                vec![
270                    title_bytes.clone(),
271                    block_size_bytes.clone(),
272                    /* max_keys 24_000_000u64 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
273                    /* redundant_blocks 5u16 */ vec![0, 5],
274                    /* max_index_key_len 9u32 */ vec![0, 0, 0, 9],
275                    reserve_bytes.clone(),
276                ]
277                .concat(),
278            ),
279        ];
280
281        for (max_keys, redundant_blocks, max_index_key_len, expected) in test_table {
282            let got = InvertedIndexHeader::new(max_keys, redundant_blocks, None, max_index_key_len)
283                .as_bytes();
284            assert_eq!(&got, &expected);
285        }
286    }
287
288    #[test]
289    #[serial]
290    fn search_file_header_from_data_array() {
291        let block_size = get_vm_page_size();
292        let block_size_bytes = block_size.to_be_bytes().to_vec();
293        // title: "ScdbIndex v0.001"
294        let title_bytes = vec![
295            83u8, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49,
296        ];
297        let reserve_bytes = vec![0u8; 66];
298        type Record = (Vec<u8>, InvertedIndexHeader);
299        let test_table: Vec<Record> = vec![
300            (
301                vec![
302                    title_bytes.clone(),
303                    block_size_bytes.clone(),
304                    /* max_keys 1_000_000u64 */ vec![0, 0, 0, 0, 0, 15, 66, 64],
305                    /* redundant_blocks 1u16 */ vec![0, 1],
306                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
307                    reserve_bytes.clone(),
308                ]
309                .concat(),
310                generate_header(1_000_000, 1, block_size, 3),
311            ),
312            (
313                vec![
314                    title_bytes.clone(),
315                    block_size_bytes.clone(),
316                    /* max_keys 24_000_000 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
317                    /* redundant_blocks 1u16 */ vec![0, 1],
318                    /* max_index_key_len 9u32 */ vec![0, 0, 0, 9],
319                    reserve_bytes.clone(),
320                ]
321                .concat(),
322                generate_header(24_000_000, 1, block_size, 9),
323            ),
324            (
325                vec![
326                    title_bytes.clone(),
327                    block_size_bytes.clone(),
328                    /* max_keys 1_000_000u64 */ vec![0, 0, 0, 0, 0, 15, 66, 64],
329                    /* redundant_blocks 9u16 */ vec![0, 9],
330                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
331                    reserve_bytes.clone(),
332                ]
333                .concat(),
334                generate_header(1_000_000, 9, block_size, 3),
335            ),
336            (
337                vec![
338                    title_bytes.clone(),
339                    block_size_bytes.clone(),
340                    /* max_keys 24_000_000u64 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
341                    /* redundant_blocks 5u16 */ vec![0, 5],
342                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
343                    reserve_bytes.clone(),
344                ]
345                .concat(),
346                generate_header(24_000_000, 5, block_size, 3),
347            ),
348        ];
349
350        for (data_array, expected) in test_table {
351            let got = InvertedIndexHeader::from_data_array(&data_array).expect("from_data_array");
352            assert_eq!(&got, &expected);
353        }
354    }
355
356    #[test]
357    #[serial]
358    fn search_file_header_from_data_array_out_of_bounds() {
359        let block_size = get_vm_page_size();
360        let block_size_bytes = block_size.to_be_bytes().to_vec();
361        // title: "ScdbIndex v0.001"
362        let title_bytes = vec![
363            83u8, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49,
364        ];
365        let reserve_bytes = vec![0u8; 66];
366        let test_table: Vec<Vec<u8>> = vec![
367            vec![
368                title_bytes[2..].to_vec(), // title is truncated
369                block_size_bytes.clone(),
370                vec![0, 0, 0, 0, 0, 15, 66, 64],
371                vec![0, 1],
372                vec![0, 0, 0, 3],
373                reserve_bytes.clone(),
374            ]
375            .concat(),
376            vec![
377                title_bytes.clone(),
378                block_size_bytes[..3].to_vec(), // block_size is truncated
379                vec![0, 0, 0, 0, 1, 110, 54, 0],
380                vec![0, 1],
381                vec![0, 0, 0, 3],
382                reserve_bytes.clone(),
383            ]
384            .concat(),
385            vec![
386                title_bytes.clone(),
387                block_size_bytes.clone(),
388                vec![0, 0, 15, 66, 64], // max_keys is truncated
389                vec![0, 9],
390                vec![0, 0, 0, 3],
391                reserve_bytes.clone(),
392            ]
393            .concat(),
394            vec![
395                title_bytes.clone(),
396                block_size_bytes.clone(),
397                vec![0, 0, 0, 0, 1, 110, 54, 0],
398                vec![5], // redundant_blocks is truncated
399                vec![0, 0, 0, 3],
400                reserve_bytes.clone(),
401            ]
402            .concat(),
403            vec![
404                title_bytes.clone(),
405                block_size_bytes.clone(),
406                vec![0, 0, 0, 0, 1, 110, 54, 0],
407                vec![0, 5],
408                vec![0, 0, 3], // max_index_key_len is truncated
409                reserve_bytes.clone(),
410            ]
411            .concat(),
412            vec![
413                title_bytes.clone(),
414                block_size_bytes.clone(),
415                vec![0, 0, 0, 0, 1, 110, 54, 0],
416                vec![0, 5],
417                vec![0, 0, 0, 3],
418                reserve_bytes[..45].to_vec(), // reserve bytes are truncated
419            ]
420            .concat(),
421        ];
422
423        for data_array in test_table {
424            let got = InvertedIndexHeader::from_data_array(&data_array);
425            assert!(got.is_err());
426        }
427    }
428
429    #[test]
430    #[serial]
431    fn search_file_header_from_file() {
432        let file_path = "testdb.scdb";
433        let block_size = get_vm_page_size();
434        let block_size_bytes = block_size.to_be_bytes().to_vec();
435        // title: "ScdbIndex v0.001"
436        let title_bytes = vec![
437            83u8, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49,
438        ];
439        let reserve_bytes = vec![0u8; 66];
440        type Record = (Vec<u8>, InvertedIndexHeader);
441        let test_table: Vec<Record> = vec![
442            (
443                vec![
444                    title_bytes.clone(),
445                    block_size_bytes.clone(),
446                    /* max_keys 1_000_000u64 */ vec![0, 0, 0, 0, 0, 15, 66, 64],
447                    /* redundant_blocks 1u16 */ vec![0, 1],
448                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
449                    reserve_bytes.clone(),
450                ]
451                .concat(),
452                generate_header(1_000_000, 1, block_size, 3),
453            ),
454            (
455                vec![
456                    title_bytes.clone(),
457                    block_size_bytes.clone(),
458                    /* max_keys 24_000_000 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
459                    /* redundant_blocks 1u16 */ vec![0, 1],
460                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
461                    reserve_bytes.clone(),
462                ]
463                .concat(),
464                generate_header(24_000_000, 1, block_size, 3),
465            ),
466            (
467                vec![
468                    title_bytes.clone(),
469                    block_size_bytes.clone(),
470                    /* max_keys 1_000_000u64 */ vec![0, 0, 0, 0, 0, 15, 66, 64],
471                    /* redundant_blocks 9u16 */ vec![0, 9],
472                    /* max_index_key_len 3u32 */ vec![0, 0, 0, 3],
473                    reserve_bytes.clone(),
474                ]
475                .concat(),
476                generate_header(1_000_000, 9, block_size, 3),
477            ),
478            (
479                vec![
480                    title_bytes.clone(),
481                    block_size_bytes.clone(),
482                    /* max_keys 24_000_000u64 */ vec![0, 0, 0, 0, 1, 110, 54, 0],
483                    /* redundant_blocks 5u16 */ vec![0, 5],
484                    /* max_index_key_len 8u32 */ vec![0, 0, 0, 8],
485                    reserve_bytes.clone(),
486                ]
487                .concat(),
488                generate_header(24_000_000, 5, block_size, 8),
489            ),
490        ];
491
492        for (data_array, expected) in test_table {
493            let mut file =
494                generate_file_with_data(file_path, &data_array).expect("generate file with data");
495            let got = InvertedIndexHeader::from_file(&mut file).expect("from_file");
496            assert_eq!(&got, &expected);
497        }
498
499        std::fs::remove_file(&file_path).expect("delete the test db file");
500    }
501
502    #[test]
503    #[serial]
504    fn search_file_header_from_data_file_out_of_bounds() {
505        let file_path = "testdb.scdb";
506        let block_size = get_vm_page_size();
507        let block_size_bytes = block_size.to_be_bytes().to_vec();
508        // title: "ScdbIndex v0.001"
509        let title_bytes = vec![
510            83u8, 99, 100, 98, 73, 110, 100, 101, 120, 32, 118, 48, 46, 48, 48, 49,
511        ];
512        let reserve_bytes = vec![0u8; 66];
513        let test_table: Vec<Vec<u8>> = vec![
514            vec![
515                title_bytes[2..].to_vec(), // title is truncated
516                block_size_bytes.clone(),
517                vec![0, 0, 0, 0, 0, 15, 66, 64],
518                vec![0, 1],
519                vec![0, 0, 0, 3],
520                reserve_bytes.clone(),
521            ]
522            .concat(),
523            vec![
524                title_bytes.clone(),
525                block_size_bytes[..3].to_vec(), // block_size is truncated
526                vec![0, 0, 0, 0, 1, 110, 54, 0],
527                vec![0, 1],
528                vec![0, 0, 0, 3],
529                reserve_bytes.clone(),
530            ]
531            .concat(),
532            vec![
533                title_bytes.clone(),
534                block_size_bytes.clone(),
535                vec![0, 0, 15, 66, 64], // max_keys is truncated
536                vec![0, 9],
537                vec![0, 0, 0, 3],
538                reserve_bytes.clone(),
539            ]
540            .concat(),
541            vec![
542                title_bytes.clone(),
543                block_size_bytes.clone(),
544                vec![0, 0, 0, 0, 1, 110, 54, 0],
545                vec![5], // redundant_blocks is truncated
546                vec![0, 0, 0, 3],
547                reserve_bytes.clone(),
548            ]
549            .concat(),
550            vec![
551                title_bytes.clone(),
552                block_size_bytes.clone(),
553                vec![0, 0, 0, 0, 1, 110, 54, 0],
554                vec![0, 5],
555                vec![0, 0, 3], // max_index_key_len is truncated
556                reserve_bytes.clone(),
557            ]
558            .concat(),
559            vec![
560                title_bytes.clone(),
561                block_size_bytes.clone(),
562                vec![0, 0, 0, 0, 1, 110, 54, 0],
563                vec![0, 5],
564                vec![0, 0, 0, 3],
565                reserve_bytes[..45].to_vec(), // reserve bytes are truncated
566            ]
567            .concat(),
568        ];
569
570        for data_array in test_table {
571            let mut file =
572                generate_file_with_data(file_path, &data_array).expect("generate file with data");
573            let got = InvertedIndexHeader::from_file(&mut file);
574            assert!(got.is_err());
575        }
576
577        std::fs::remove_file(&file_path).expect("delete the test db file");
578    }
579
580    #[test]
581    #[serial]
582    fn search_file_header_get_index_offset() {
583        let db_header = InvertedIndexHeader::new(None, None, None, None);
584        let offset = db_header.get_index_offset(b"foo");
585        let block_1_start = HEADER_SIZE_IN_BYTES;
586        let block_1_end = db_header.net_block_size + block_1_start;
587        assert!(block_1_start <= offset && offset < block_1_end);
588    }
589
590    #[test]
591    #[serial]
592    fn search_file_header_get_index_offset_in_nth_block() {
593        let db_header = InvertedIndexHeader::new(None, None, None, None);
594        let initial_offset = db_header.get_index_offset(b"foo");
595        let num_of_blocks = db_header.number_of_index_blocks;
596        for i in 0..num_of_blocks {
597            let block_start = HEADER_SIZE_IN_BYTES + (i * db_header.net_block_size);
598            let block_end = db_header.net_block_size + block_start;
599            let offset = db_header
600                .get_index_offset_in_nth_block(initial_offset, i)
601                .expect("get_index_offset_in_nth_block");
602            assert!(block_start <= offset && offset < block_end);
603        }
604    }
605
606    #[test]
607    #[serial]
608    fn search_file_header_get_index_offset_in_nth_block_out_of_bounds() {
609        let db_header = InvertedIndexHeader::new(None, None, None, None);
610        let initial_offset = db_header.get_index_offset(b"foo");
611        let num_of_blocks = db_header.number_of_index_blocks;
612
613        for i in num_of_blocks..num_of_blocks + 2 {
614            assert!(db_header
615                .get_index_offset_in_nth_block(initial_offset, i)
616                .is_err());
617        }
618    }
619
620    /// Generates a InvertedIndexHeader basing on the inputs supplied. This is just a helper for tests
621    fn generate_header(
622        max_keys: u64,
623        redundant_blocks: u16,
624        block_size: u32,
625        max_index_key_len: u32,
626    ) -> InvertedIndexHeader {
627        let derived_props = DerivedHeaderProps::new(block_size, max_keys, redundant_blocks);
628
629        InvertedIndexHeader {
630            title: "ScdbIndex v0.001".to_string(),
631            block_size,
632            max_keys,
633            redundant_blocks,
634            items_per_index_block: derived_props.items_per_index_block,
635            number_of_index_blocks: derived_props.number_of_index_blocks,
636            values_start_point: derived_props.values_start_point,
637            net_block_size: derived_props.net_block_size,
638            max_index_key_len,
639        }
640    }
641
642    /// Returns a file that has the given data array written to it.
643    fn generate_file_with_data(file_path: &str, data_array: &[u8]) -> io::Result<File> {
644        let mut file = OpenOptions::new()
645            .write(true)
646            .read(true)
647            .create(true)
648            .open(file_path)?;
649        file.seek(SeekFrom::Start(0))?;
650        file.write_all(data_array)?;
651        Ok(file)
652    }
653}