Skip to main content

rusty_chunkenc/
index.rs

1use std::num::NonZeroUsize;
2
3use nom::{branch::alt, bytes::complete::tag, IResult, Parser};
4
5use crate::{
6    errors::RustyChunkEncError,
7    series::{read_series, Serie},
8    symbol_table::read_symbol_table,
9    toc::read_toc_at_end,
10};
11
12#[derive(Debug)]
13pub struct IndexDiskFormat {
14    series: Vec<Serie>,
15}
16
17impl IndexDiskFormat {
18    pub fn new(series: Vec<Serie>) -> Self {
19        Self { series }
20    }
21
22    pub fn series(&self) -> &Vec<Serie> {
23        &self.series
24    }
25}
26
27static HEADER_LENGTH: usize = 5;
28
29pub fn read_index_disk_format(input: &[u8]) -> IResult<&[u8], IndexDiskFormat> {
30    let (remaining_input, (_, index_disk_format)) = (
31        // Index on disk start with 0xBA AA D7 00
32        tag(&[0xBA, 0xAA, 0xD7, 0x00][..]),
33        alt((read_version_one, read_version_two)),
34    )
35        .parse(input)?;
36
37    Ok((remaining_input, index_disk_format))
38}
39
40// Looks like version 1 and version 2 are the same for what we want to parse.
41fn read_simple_sections(input: &[u8]) -> IResult<&[u8], IndexDiskFormat> {
42    let (remaining_input, toc) = read_toc_at_end(input)?;
43
44    let symbols = if let Some(symbols) = toc.symbols {
45        if let Some((_, symbols_input)) = input.split_at_checked(symbols - HEADER_LENGTH) {
46            let (_, tmp_symbols) = read_symbol_table(symbols_input)?;
47            tmp_symbols
48        } else {
49            return Err(nom::Err::Incomplete(nom::Needed::Size(
50                NonZeroUsize::new(symbols - HEADER_LENGTH - input.len()).unwrap(),
51            )));
52        }
53    } else {
54        Vec::new()
55    };
56
57    let series = if let Some(series_start) = toc.series {
58        if let Some((_, series_input)) = input.split_at_checked(series_start - HEADER_LENGTH) {
59            // Try to find the end of the series data section
60            let series_end = toc.label_indices_start.unwrap_or_else(|| {
61                toc.label_offset_table.unwrap_or_else(|| {
62                    toc.postings_start
63                        .unwrap_or_else(|| toc.postings_offset_table.unwrap_or(0))
64                })
65            });
66
67            let (_, tmp_series) = read_series(series_start, series_end)(series_input)?;
68            tmp_series
69        } else {
70            return Err(nom::Err::Incomplete(nom::Needed::Size(
71                NonZeroUsize::new(series_start - HEADER_LENGTH - input.len()).unwrap(),
72            )));
73        }
74    } else {
75        Vec::new()
76    };
77
78    //println!("toc: {:?}", toc);
79    //println!("symbols: {:?}", symbols);
80    //println!("series: {:?}", series);
81
82    // Apply the symbol table to the series
83    let series_finalised: Vec<Serie> = series
84        .into_iter()
85        .map(|s| s.finalise(&symbols))
86        .collect::<Result<Vec<Serie>, RustyChunkEncError>>()
87        .map_err(|_| {
88            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
89        })?;
90
91    Ok((remaining_input, IndexDiskFormat::new(series_finalised)))
92}
93
94pub fn read_version_one(input: &[u8]) -> IResult<&[u8], IndexDiskFormat> {
95    let (remaining_input, (_, index_disk_format)) =
96        (tag(&[1u8][..]), read_simple_sections).parse(input)?;
97
98    Ok((remaining_input, index_disk_format))
99}
100
101pub fn read_version_two(input: &[u8]) -> IResult<&[u8], IndexDiskFormat> {
102    let (remaining_input, (_, index_disk_format)) =
103        (tag(&[2u8][..]), read_simple_sections).parse(input)?;
104
105    Ok((remaining_input, index_disk_format))
106}