1use crate::util;
4use crate::IResult;
5use crate::ParserError;
6use nom::bytes::complete::take;
7use sqlite_types::TextEncoding;
8
9type BoxError = Box<dyn std::error::Error>;
10
11#[derive(Debug)]
12pub enum Record {
13 Null,
14 Int8(i8),
15 Int16(i16),
16 Blob(Vec<u8>),
17 Text(String),
18}
19
20impl Record {
21 pub fn as_string(&self) -> String {
22 match self {
23 Self::Text(v) => v.clone(),
24 Self::Null => "NULL".to_owned(),
25 v => unreachable!("expected string, given {:?}", v),
26 }
27 }
28
29 pub fn as_int(&self) -> usize {
30 match self {
31 Self::Int8(v) => *v as usize,
32 Self::Int16(v) => *v as usize,
33 _ => unreachable!(),
34 }
35 }
36}
37
38#[derive(Clone)]
39pub struct InputContext<'a> {
40 pub(crate) input: &'a [u8],
41 pub(crate) original_input: Vec<u8>,
42}
43
44#[derive(Debug)]
45pub enum Cell {
46 TableBTreeLeafCell(TableBTreeLeafCell),
48 TableBTreeInteriorCell(TableBTreeInteriorCell),
49 IndexBTreeLeafCell(IndexBTreeLeafCell),
50 IndexBTreeInteriorCell(IndexBTreeInteriorCell),
51}
52
53#[derive(Debug)]
54pub struct TableBTreeLeafCell {
55 rowid: u64,
57 pub records: Vec<Record>,
58 page_first_overflow: Option<u32>,
59}
60
61#[derive(Debug)]
62pub struct TableBTreeInteriorCell {
63 pub left_child_page: u32,
64 pub rowid: u64,
65}
66
67#[derive(Debug)]
68pub struct IndexBTreeLeafCell {}
69
70#[derive(Debug)]
71pub struct IndexBTreeInteriorCell {}
72
73#[derive(Debug)]
74pub struct BtreeHeader {
75 page_type: PageType,
76 start_first_freeblock: u16,
77 cell_count: u16,
78 start_cell_content_area: u16,
79 fragmented_free_bytes_count: u8,
80 right_most_pointer: Option<u32>,
81}
82
83#[derive(Debug)]
84pub struct Btree {
85 pub header: BtreeHeader,
86 pub cells: Vec<Cell>,
87}
88
89#[derive(Debug)]
90enum PageContent {
91 Index,
92 Table,
93}
94
95#[derive(Debug)]
96enum PageType {
97 Leaf(PageContent),
98 Interior(PageContent),
99}
100impl PageType {
101 fn header_size(&self) -> u8 {
102 use PageType::*;
103 match self {
104 Leaf(_) => 8,
105 Interior(_) => 12,
106 }
107 }
108
109 fn is_interior(&self) -> bool {
110 matches!(self, PageType::Interior(_))
111 }
112}
113
114fn decode_page_type<'a>(input: InputContext<'a>) -> IResult<InputContext<'a>, PageType> {
115 let (input, byte) = input.read_u8()?;
116 let t = match byte {
117 0x02 => PageType::Interior(PageContent::Index),
118 0x05 => PageType::Interior(PageContent::Table),
119 0x0a => PageType::Leaf(PageContent::Index),
120 0x0d => PageType::Leaf(PageContent::Table),
121 e => {
122 return Err(nom::Err::Failure(ParserError(format!(
123 "unsupported page type: {}",
124 e
125 ))))
126 }
127 };
128 Ok((input, t))
129}
130
131fn decode_header<'a>(input: InputContext<'a>) -> IResult<InputContext<'a>, BtreeHeader> {
132 let (input, page_type) = decode_page_type(input)?;
133 let (input, start_first_freeblock) = input.read_u16()?;
134 let (input, cell_count) = input.read_u16()?;
135 let (input, start_cell_content_area) = input.read_u16()?;
136 let (input, fragmented_free_bytes_count) = input.read_u8()?;
137
138 let (input, right_most_pointer) = if page_type.is_interior() {
139 let (input, right_most_pointer) = input.read_u32()?;
140 (input, Some(right_most_pointer))
141 } else {
142 (input, None)
143 };
144
145 let header = BtreeHeader {
146 page_type,
147 start_first_freeblock,
148 cell_count,
149 start_cell_content_area,
150 fragmented_free_bytes_count,
151 right_most_pointer,
152 };
153 Ok((input, header))
154}
155
156fn decode_cell_pointers<'a>(
157 header: &BtreeHeader,
158 mut input: InputContext<'a>,
159) -> IResult<InputContext<'a>, Vec<u16>> {
160 let mut cell_pointers = Vec::with_capacity(header.cell_count as usize);
161
162 for _ in 0..header.cell_count {
163 let res = input.read_u16()?;
164 input = res.0;
165 cell_pointers.push(res.1);
166 }
167
168 Ok((input, cell_pointers))
169}
170
171fn decode_cell<'a>(
172 enc: &TextEncoding,
173 parent: &PageType,
174 input: InputContext<'a>,
175) -> IResult<InputContext<'a>, Cell> {
176 let (input, cell) = match parent {
177 PageType::Leaf(PageContent::Table) => {
178 let (input, cell) = decode_table_leaf_cell(enc, input)?;
179 (input, Cell::TableBTreeLeafCell(cell))
180 }
181 PageType::Interior(PageContent::Table) => {
182 let (input, cell) = decode_table_interior_cell(input)?;
183 (input, Cell::TableBTreeInteriorCell(cell))
184 }
185 e => {
186 return Err(nom::Err::Failure(ParserError(format!(
187 "unsupported cell with parent: {:?}",
188 e
189 ))))
190 }
191 };
192
193 Ok((input, cell))
194}
195
196fn decode_table_interior_cell<'a>(
197 input: InputContext<'a>,
198) -> IResult<InputContext<'a>, TableBTreeInteriorCell> {
199 let (input, left_child_page) = input.read_u32()?;
200 let (input, rowid) = input.read_varint()?;
201
202 Ok((
203 input,
204 TableBTreeInteriorCell {
205 left_child_page,
206 rowid,
207 },
208 ))
209}
210
211fn decode_table_leaf_cell<'a>(
212 enc: &TextEncoding,
213 input: InputContext<'a>,
214) -> IResult<InputContext<'a>, TableBTreeLeafCell> {
215 let (input, total_payload_size) = input.read_varint()?;
216 let (input, rowid) = input.read_varint()?;
217 let (input, raw_payload) = input.read_bytes(total_payload_size as usize)?;
220 let (_input, records) = decode_records(enc, raw_payload)?;
221
222 let page_first_overflow = None;
226
227 Ok((
228 input,
229 TableBTreeLeafCell {
230 rowid,
231 records,
232 page_first_overflow,
233 },
234 ))
235}
236
237fn decode_records<'a>(enc: &TextEncoding, input: &'a [u8]) -> IResult<&'a [u8], Vec<Record>> {
238 let (input, (header_size, took)) = read_varint(input)?;
239
240 let header_input = &input[..header_size as usize - took];
242 let (_input, columns) = decode_record_columns(header_input)?;
243
244 let mut input = &input[header_size as usize - took..];
245
246 let records = {
247 let mut values = Vec::with_capacity(columns.len());
248
249 for serial_type in columns {
250 let res = decode_record_value(enc, serial_type, input)?;
251 input = res.0;
252 values.push(res.1);
253 }
254
255 values
256 };
257
258 Ok((input, records))
259}
260
261fn decode_record_columns<'a>(mut input: &'a [u8]) -> IResult<&'a [u8], Vec<u64>> {
262 let mut columns = Vec::new();
263 loop {
264 let res = read_varint(input)?;
265 input = res.0;
266 columns.push(res.1 .0);
267
268 if input.is_empty() {
269 break;
270 }
271 }
272
273 Ok((input, columns.to_owned()))
274}
275
276fn decode_record_value<'a>(
277 enc: &TextEncoding,
278 serial_type: u64,
279 input: &'a [u8],
280) -> IResult<&'a [u8], Record> {
281 use Record::*;
282 let (input, serial_type) = match serial_type {
283 0 => (input, Null),
284 1 => {
285 let (input, value) = take(1usize)(input)?;
286 (input, Int8(i8::from_be_bytes(value.try_into().unwrap())))
287 }
288 2 => {
289 let (input, value) = take(2usize)(input)?;
290 (input, Int16(i16::from_be_bytes(value.try_into().unwrap())))
291 }
292 v if v > 12 && v % 2 == 0 => {
293 let size = (v - 12) / 2;
294 let (input, bytes) = take(size)(input)?;
295
296 (input, Blob(bytes.to_owned()))
297 }
298 v if v > 13 && v % 2 != 0 => {
299 let size = (v - 13) / 2;
300
301 let (input, bytes) = take(size)(input)?;
302
303 use TextEncoding::*;
304 let bytes = bytes.to_vec();
305 let value = match enc {
306 UTF8 => String::from_utf8(bytes).unwrap(),
307 UTF16le | UTF16be => unimplemented!(),
308 };
309
310 (input, Text(value))
311 }
312 e => {
313 return Err(nom::Err::Failure(ParserError(format!(
314 "unsupported serial type: {}",
315 e
316 ))))
317 }
318 };
319
320 Ok((input, serial_type))
321}
322
323pub fn decode_first_page<'a>(enc: &'a TextEncoding, page: &'a [u8]) -> Result<Btree, BoxError> {
325 let input = &page[100..];
328 let input = InputContext {
329 input,
330 original_input: page.to_owned(),
331 };
332 match decode_btree(enc, input) {
333 Ok((_, btree)) => Ok(btree),
334 Err(err) => Err(format!("failed to decode: {}", err).into()),
335 }
336}
337
338pub fn decode<'a>(enc: &'a TextEncoding, input: &'a [u8]) -> Result<Btree, BoxError> {
340 let input = InputContext {
341 input,
342 original_input: input.to_owned(),
343 };
344 match decode_btree(enc, input) {
345 Ok((_, btree)) => Ok(btree),
346 Err(err) => Err(format!("failed to decode: {}", err).into()),
347 }
348}
349
350fn decode_btree<'a>(
351 enc: &TextEncoding,
352 input: InputContext<'a>,
353) -> IResult<InputContext<'a>, Btree> {
354 let (input, header) = decode_header(input)?;
355 let (input, cell_pointers) = decode_cell_pointers(&header, input)?;
356 let (input, cells) = {
357 let mut cells = Vec::with_capacity(cell_pointers.len());
358
359 let prev_input = input.clone();
360
361 for cell_pointer in cell_pointers {
362 let input = input.seek_at(cell_pointer as usize);
363
364 let res = decode_cell(enc, &header.page_type, input)?;
365 cells.push(res.1);
366 }
367
368 (prev_input, cells)
369 };
370
371 let btree = Btree { header, cells };
374 Ok((input, btree))
375}
376
377impl<'a> InputContext<'a> {
378 fn seek_at(&'a self, offset: usize) -> InputContext<'a> {
379 let input = &self.original_input[offset..];
380 Self {
381 input,
382 original_input: self.original_input.clone(),
383 }
384 }
385
386 fn read_u32(self) -> IResult<InputContext<'a>, u32> {
387 let (input, v) = util::read_u32(&self.input)?;
388 Ok((
389 Self {
390 input,
391 original_input: self.original_input,
392 },
393 v,
394 ))
395 }
396
397 fn read_u16(self) -> IResult<InputContext<'a>, u16> {
398 let (input, v) = util::read_u16(&self.input)?;
399 Ok((
400 Self {
401 input,
402 original_input: self.original_input,
403 },
404 v,
405 ))
406 }
407
408 fn read_u8(self) -> IResult<InputContext<'a>, u8> {
409 let (input, v) = util::read_u8(&self.input)?;
410 Ok((
411 Self {
412 input,
413 original_input: self.original_input,
414 },
415 v,
416 ))
417 }
418
419 fn read_varint(self) -> IResult<InputContext<'a>, u64> {
420 let (input, (v, _)) = read_varint(&self.input)?;
421
422 Ok((
423 Self {
424 input,
425 original_input: self.original_input,
426 },
427 v,
428 ))
429 }
430
431 fn read_bytes(self, n: usize) -> IResult<InputContext<'a>, &'a [u8]> {
432 let (input, bytes) = take(n)(self.input)?;
433
434 Ok((
435 Self {
436 input,
437 original_input: self.original_input,
438 },
439 bytes,
440 ))
441 }
442}
443
444fn read_varint<'a>(input: &'a [u8]) -> IResult<&'a [u8], (u64, usize)> {
446 let mut v = 0u64;
447 let mut i = 0usize;
448
449 loop {
450 if i >= 8 {
451 break;
452 }
453
454 v = (v << 7) + (input[i] & 0x7f) as u64;
455 if (input[i] & 0x80) == 0 {
456 return Ok((&input[i + 1..], (v, i + 1)));
457 }
458
459 i += 1;
460 }
461
462 v = (v << 8) + (input[i] & 0xff) as u64;
463
464 let input = &input[9..];
465 Ok((input, (v, 9)))
466}