sqlite_parser_nom/
parser.rs

1use crate::be_i48;
2use be_i48::be_i48;
3use nom::branch::alt;
4use nom::bytes::complete::tag;
5use nom::bytes::complete::take;
6use nom::combinator::{complete, map, map_parser, map_res};
7use nom::multi::{count, many0};
8use nom::number::complete::{be_f64, be_i16, be_i24, be_i32, be_i64, be_i8, be_u16, be_u32, be_u8};
9use nom::sequence::{pair, Tuple};
10use nom::IResult;
11
12use crate::model::*;
13use crate::varint::be_u64_varint;
14
15const HEADER_SIZE: usize = 100;
16
17/// Goes through the whole input page-by-page
18/// NOTE: you should use specific parsers or Reader to parse file lazily
19pub fn database(i: &[u8]) -> IResult<&[u8], Database> {
20    let (i, header) = db_header(i)?;
21
22    let page_size = header.page_size.real_size();
23
24    let root_page = map_parser(take(page_size - HEADER_SIZE), page_generic(HEADER_SIZE));
25    let pages = complete(many0(map_parser(take(page_size), page_generic(0))));
26
27    let (i, (root_page, mut pages)) = complete(pair(root_page, pages))(i)?;
28
29    pages.insert(0, root_page);
30
31    Ok((i, Database { header, pages }))
32}
33
34/// File header parser. Page size and text encoding are required for the rest to work correctly.
35pub fn db_header(i: &[u8]) -> IResult<&[u8], DbHeader> {
36    let (i, _) = tag("SQLite format 3\0")(i)?;
37    let (i, page_size) = map(be_u16, PageSize)(i)?;
38    let (i, (write_version, read_version)) = (be_u8, be_u8).parse(i)?;
39    let (i, _reserved) = be_u8(i)?;
40    let (i, (max_payload_fraction, min_payload_fraction, leaf_payload_fraction)) =
41        (be_u8, be_u8, be_u8).parse(i)?;
42    let (i, file_change_counter) = be_u32(i)?;
43    let (i, db_size) = be_u32(i)?;
44    let (i, (first_freelist_page_no, total_freelist_pages)) = (be_u32, be_u32).parse(i)?;
45    let (i, (schema_cookie, schema_format_no)) = (be_u32, be_u32).parse(i)?;
46    let (i, default_page_cache_size) = be_u32(i)?;
47    let (i, no_largest_root_b_tree) = be_u32(i)?;
48    let (i, db_text_encoding) = map_res(be_u32, |x| x.try_into())(i)?;
49    let (i, user_version) = be_u32(i)?;
50    let (i, incremental_vacuum_mode) = be_u32(i)?;
51    let (i, application_id) = be_u32(i)?;
52    let (i, _reserved) = count(be_u8, 20)(i)?;
53    let (i, (version_valid_for_no, sqlite_version_number)) = (be_u32, be_u32).parse(i)?;
54
55    Ok((
56        i,
57        DbHeader {
58            page_size,
59            write_version,
60            read_version,
61            max_payload_fraction,
62            min_payload_fraction,
63            leaf_payload_fraction,
64            file_change_counter,
65            db_size,
66            first_freelist_page_no,
67            total_freelist_pages,
68            schema_cookie,
69            schema_format_no,
70            default_page_cache_size,
71            no_largest_root_b_tree,
72            db_text_encoding,
73            user_version,
74            incremental_vacuum_mode,
75            application_id,
76            version_valid_for_no,
77            sqlite_version_number,
78        },
79    ))
80}
81
82/// The page number 0, which comes right after the header. Input assumed to contain the header.
83pub fn root_page(i: &[u8]) -> IResult<&[u8], Page> {
84    let shrunk_page = &i[HEADER_SIZE..];
85    page_generic(HEADER_SIZE)(shrunk_page)
86}
87
88/// All the rest of pages, pageno >0.
89pub fn page(i: &[u8]) -> IResult<&[u8], Page> {
90    page_generic(0)(i)
91}
92
93// todo: fix const generic thing, hack to pass through parameters
94fn page_generic(page_start_offset: usize) -> impl FnMut(&[u8]) -> IResult<&[u8], Page> {
95    move |i| {
96        alt((
97            map(
98                interior_index_b_tree_page(page_start_offset),
99                Page::InteriorIndex,
100            ),
101            map(leaf_index_b_tree_page(page_start_offset), Page::LeafIndex),
102            map(
103                interior_table_b_tree_page(page_start_offset),
104                Page::InteriorTable,
105            ),
106            map(leaf_table_b_tree_page(page_start_offset), Page::LeafTable),
107        ))(i)
108    }
109}
110
111fn interior_page_header(i: &[u8]) -> IResult<&[u8], InteriorPageHeader> {
112    let (i, first_freeblock_offset) = map(be_u16, |u| Some(u).filter(|&p| p != 0x0u16))(i)?;
113    let (i, no_cells) = be_u16(i)?;
114    let (i, cell_content_offset) = map(be_u16, CellOffset)(i)?;
115    let (i, no_fragmented_bytes) = be_u8(i)?;
116    let (i, rightmost_pointer) = be_u32(i)?;
117
118    Ok((
119        i,
120        InteriorPageHeader {
121            first_freeblock_offset,
122            no_cells,
123            cell_content_offset,
124            no_fragmented_bytes,
125            rightmost_pointer,
126        },
127    ))
128}
129
130fn leaf_page_header(i: &[u8]) -> IResult<&[u8], LeafPageHeader> {
131    let (i, first_freeblock_offset) = map(be_u16, |u| Some(u).filter(|&p| p != 0x0u16))(i)?;
132    let (i, no_cells) = be_u16(i)?;
133    let (i, cell_content_offset) = map(be_u16, CellOffset)(i)?;
134    let (i, no_fragmented_bytes) = be_u8(i)?;
135
136    Ok((
137        i,
138        LeafPageHeader {
139            first_freeblock_offset,
140            no_cells,
141            cell_content_offset,
142            no_fragmented_bytes,
143        },
144    ))
145}
146
147fn interior_index_b_tree_page(
148    page_start_offset: usize,
149) -> impl FnMut(&[u8]) -> IResult<&[u8], InteriorIndexPage> {
150    move |i| {
151        let (ii, _) = tag([0x02u8])(i)?;
152        let (ii, header) = interior_page_header(ii)?;
153        let (ii, cell_pointers) = count(be_u16, header.no_cells.into())(ii)?;
154
155        let mut cells = Vec::with_capacity(cell_pointers.len());
156        for &ptr in cell_pointers.iter() {
157            let cell_offset = ptr as usize - page_start_offset;
158            let (_, cell) = interior_index_cell(&i[cell_offset..])?;
159            cells.push(cell);
160        }
161
162        Ok((
163            ii,
164            InteriorIndexPage {
165                header,
166                cell_pointers,
167                cells,
168            },
169        ))
170    }
171}
172
173/// Expects to get exactly as many bytes in input as it will consume
174fn column_types(i: &[u8]) -> IResult<&[u8], Vec<SerialType>> {
175    // many0 as header might actually be empty
176    complete(many0(map(be_u64_varint, SerialType::from)))(i)
177}
178
179fn text_payload(size: usize) -> impl FnMut(&[u8]) -> IResult<&[u8], Option<Payload>> {
180    move |i| map(take(size), |x: &[u8]| Some(Payload::Text(RawText::new(x))))(i)
181}
182
183fn blob_payload(size: usize) -> impl FnMut(&[u8]) -> IResult<&[u8], Option<Payload>> {
184    move |i| map(take(size), |x: &[u8]| Some(Payload::Blob(x)))(i)
185}
186
187fn column_values<'a, 'b>(
188    serial_types: &'b [SerialType],
189) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<Option<Payload>>> + 'b {
190    move |i| {
191        let mut i: &[u8] = i;
192        let mut res = Vec::with_capacity(serial_types.len());
193        for serial_type in serial_types {
194            let (ii, v) = match serial_type {
195                SerialType::Null => Ok((i, None)),
196                SerialType::I8 => map(be_i8, |x| Some(Payload::I8(x)))(i),
197                SerialType::I16 => map(be_i16, |x| Some(Payload::I16(x)))(i),
198                SerialType::I24 => map(be_i24, |x| Some(Payload::I32(x)))(i),
199                SerialType::I32 => map(be_i32, |x| Some(Payload::I32(x)))(i),
200                SerialType::I48 => map(be_i48, |x| Some(Payload::I64(x)))(i),
201                SerialType::I64 => map(be_i64, |x| Some(Payload::I64(x)))(i),
202                SerialType::F64 => map(be_f64, |x| Some(Payload::F64(x)))(i),
203                SerialType::Const0 => Ok((i, Some(Payload::I8(0)))),
204                SerialType::Const1 => Ok((i, Some(Payload::I8(0)))),
205                SerialType::Reserved => unimplemented!("reserved"),
206                SerialType::Blob(_) if serial_type.size() == 0 => Ok((i, None)),
207                SerialType::Blob(_) => blob_payload(serial_type.size())(i),
208                SerialType::Text(_) if serial_type.size() == 0 => Ok((i, None)),
209                SerialType::Text(_) => text_payload(serial_type.size())(i),
210            }?;
211            i = ii;
212            dbg!(v.clone());
213            res.push(v);
214        }
215
216        Ok((i, res))
217    }
218}
219
220fn index_cell_payload(i: &[u8]) -> IResult<&[u8], IndexCellPayload> {
221    let (i, header_size) = be_u64_varint(i)?;
222    let (_, column_types) = column_types(&i[0..header_size as usize - 1])?;
223    let (i, column_values) = column_values(&column_types)(&i[header_size as usize - 1..])?;
224    let (i, rowid) = be_u64_varint(i)?;
225
226    Ok((
227        i,
228        IndexCellPayload {
229            header_size,
230            column_types,
231            column_values,
232            rowid,
233        },
234    ))
235}
236
237fn interior_index_cell(i: &[u8]) -> IResult<&[u8], InteriorIndexCell> {
238    let (i, left_child_page_no) = be_u32(i)?;
239    let (i, payload_size) = be_u64_varint(i)?;
240    let (i, payload) = index_cell_payload(i)?;
241
242    Ok((
243        i,
244        InteriorIndexCell {
245            left_child_page_no,
246            payload_size,
247            payload,
248            overflow_page_no: None,
249        },
250    ))
251}
252
253fn interior_table_b_tree_page(
254    page_start_offset: usize,
255) -> impl FnMut(&[u8]) -> IResult<&[u8], InteriorTablePage> {
256    move |i| {
257        let (ii, _) = tag([0x05u8])(i)?;
258        let (ii, header) = interior_page_header(ii)?;
259        let (ii, cell_pointers) = count(be_u16, header.no_cells.into())(ii)?;
260
261        let mut cells = Vec::with_capacity(cell_pointers.len());
262        for &ptr in cell_pointers.iter() {
263            let cell_offset = ptr as usize - page_start_offset;
264            let (_, cell) = interior_table_cell(&i[cell_offset..])?;
265            cells.push(cell);
266        }
267
268        Ok((
269            ii,
270            InteriorTablePage {
271                header,
272                cell_pointers,
273                cells,
274            },
275        ))
276    }
277}
278
279fn interior_table_cell(i: &[u8]) -> IResult<&[u8], InteriorTableCell> {
280    let (i, left_child_page_no) = be_u32(i)?;
281    let (i, integer_key) = be_u64_varint(i)?;
282
283    Ok((
284        i,
285        InteriorTableCell {
286            left_child_page_no,
287            integer_key,
288        },
289    ))
290}
291
292fn leaf_index_b_tree_page(
293    page_start_offset: usize,
294) -> impl FnMut(&[u8]) -> IResult<&[u8], LeafIndexPage> {
295    move |i| {
296        let (ii, _) = tag([0x0au8])(i)?;
297        let (ii, header) = leaf_page_header(ii)?;
298        let (ii, cell_pointers) = count(be_u16, header.no_cells.into())(ii)?;
299
300        let mut cells = Vec::with_capacity(cell_pointers.len());
301        for &ptr in cell_pointers.iter() {
302            let cell_offset = ptr as usize - page_start_offset;
303            let (_, cell) = leaf_index_cell(&i[cell_offset..])?;
304            cells.push(cell);
305        }
306
307        Ok((
308            ii,
309            LeafIndexPage {
310                header,
311                cell_pointers,
312                cells,
313            },
314        ))
315    }
316}
317
318fn leaf_index_cell(i: &[u8]) -> IResult<&[u8], LeafIndexCell> {
319    let (i, payload_size) = be_u64_varint(i)?;
320    let (i, payload) = index_cell_payload(i)?;
321
322    Ok((
323        i,
324        LeafIndexCell {
325            payload_size,
326            payload,
327            overflow_page_no: None,
328        },
329    ))
330}
331
332fn leaf_table_b_tree_page(
333    page_start_offset: usize,
334) -> impl FnMut(&[u8]) -> IResult<&[u8], LeafTablePage> {
335    move |i| {
336        let (ii, _) = tag([0x0du8])(i)?;
337        let (ii, header) = leaf_page_header(ii)?;
338        let (ii, cell_pointers) = count(be_u16, header.no_cells.into())(ii)?;
339
340        let mut cells = Vec::with_capacity(cell_pointers.len());
341        for &ptr in cell_pointers.iter() {
342            let cell_offset = ptr as usize - page_start_offset;
343            let (_, cell) = leaf_table_cell(&i[cell_offset..])?;
344            cells.push(cell);
345        }
346
347        Ok((
348            ii,
349            LeafTablePage {
350                header,
351                cell_pointers,
352                cells,
353            },
354        ))
355    }
356}
357
358fn table_cell_payload(i: &[u8]) -> IResult<&[u8], TableCellPayload> {
359    let (i, header_size) = be_u64_varint(i)?;
360    let (_, column_types) = column_types(&i[0..header_size as usize - 1])?;
361    let (i, column_values) = column_values(&column_types)(&i[header_size as usize - 1..])?;
362
363    Ok((
364        i,
365        TableCellPayload {
366            header_size,
367            column_types,
368            column_values,
369        },
370    ))
371}
372
373fn leaf_table_cell(i: &[u8]) -> IResult<&[u8], LeafTableCell> {
374    let (i, payload_size) = be_u64_varint(i)?;
375    let (i, rowid) = be_u64_varint(i)?;
376    let (i, payload) = table_cell_payload(i)?;
377
378    Ok((
379        i,
380        LeafTableCell {
381            payload_size,
382            rowid,
383            payload,
384            overflow_page_no: None,
385        },
386    ))
387}