oxigdal_gpkg/sqlite_reader.rs
1//! Pure Rust SQLite binary format reader.
2//!
3//! Parses the 100-byte SQLite file header and provides page-level access.
4//! Reference: <https://www.sqlite.org/fileformat.html>
5
6use crate::error::GpkgError;
7
8/// SQLite text encoding identifier.
9#[derive(Debug, Clone, PartialEq)]
10pub enum TextEncoding {
11 /// UTF-8 encoding (value 1 in the header).
12 Utf8,
13 /// UTF-16 little-endian encoding (value 2).
14 Utf16Le,
15 /// UTF-16 big-endian encoding (value 3).
16 Utf16Be,
17}
18
19/// Parsed 100-byte SQLite file header.
20#[derive(Debug, Clone)]
21pub struct SqliteHeader {
22 /// Actual page size in bytes (raw value 1 maps to 65536).
23 pub page_size: u32,
24 /// Database size in pages (may be 0 for older files; use `SqliteReader::page_count`).
25 pub db_size_pages: u32,
26 /// Page number of the first trunk page of the freelist (0 if no freelist).
27 pub first_freelist_page: u32,
28 /// Total number of free pages in the freelist.
29 pub freelist_page_count: u32,
30 /// Schema cookie (incremented on each schema change).
31 pub schema_version: u32,
32 /// Schema format number (1–4).
33 pub schema_format: u8,
34 /// Suggested default cache size in pages (signed).
35 pub default_cache_size: i32,
36 /// Text encoding used by the database.
37 pub text_encoding: TextEncoding,
38 /// User-defined version number (offset 60).
39 pub user_version: u32,
40 /// Application ID written by `PRAGMA application_id` (offset 68).
41 /// Value `0x47504B47` ("GPKG") identifies a GeoPackage file.
42 pub application_id: u32,
43}
44
45impl SqliteHeader {
46 /// Returns `true` if the application_id marks this as a GeoPackage.
47 pub fn is_geopackage(&self) -> bool {
48 self.application_id == 0x4750_4B47
49 }
50}
51
52/// Minimal SQLite binary file parser providing header access and page slicing.
53pub struct SqliteReader {
54 /// Raw file bytes.
55 data: Vec<u8>,
56 /// Parsed file header.
57 pub header: SqliteHeader,
58}
59
60impl SqliteReader {
61 /// Parse a SQLite file from its raw bytes.
62 ///
63 /// # Errors
64 /// Returns [`GpkgError::InvalidFormat`] when the data is too short or does
65 /// not begin with the SQLite magic string.
66 pub fn from_bytes(data: Vec<u8>) -> Result<Self, GpkgError> {
67 const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
68
69 if data.len() < 100 {
70 return Err(GpkgError::InvalidFormat(
71 "Data too short for SQLite header (need ≥ 100 bytes)".into(),
72 ));
73 }
74 if !data.starts_with(SQLITE_MAGIC) {
75 return Err(GpkgError::InvalidFormat("Not a SQLite file".into()));
76 }
77
78 // Offset 16: page size (2 bytes, big-endian). Value 1 means 65536.
79 let page_size_raw = u16::from_be_bytes([data[16], data[17]]) as u32;
80 let page_size = if page_size_raw == 1 {
81 65536
82 } else {
83 page_size_raw
84 };
85
86 // Offset 56: text encoding (4 bytes, big-endian).
87 let text_encoding = match u32::from_be_bytes([data[56], data[57], data[58], data[59]]) {
88 2 => TextEncoding::Utf16Le,
89 3 => TextEncoding::Utf16Be,
90 _ => TextEncoding::Utf8,
91 };
92
93 let header = SqliteHeader {
94 page_size,
95 // Offset 28: database size in pages.
96 db_size_pages: u32::from_be_bytes([data[28], data[29], data[30], data[31]]),
97 // Offset 32: first trunk freelist page.
98 first_freelist_page: u32::from_be_bytes([data[32], data[33], data[34], data[35]]),
99 // Offset 36: total freelist pages.
100 freelist_page_count: u32::from_be_bytes([data[36], data[37], data[38], data[39]]),
101 // Offset 40: schema cookie.
102 schema_version: u32::from_be_bytes([data[40], data[41], data[42], data[43]]),
103 // Offset 44: schema format number.
104 schema_format: data[44],
105 // Offset 48: default cache size (signed).
106 default_cache_size: i32::from_be_bytes([data[48], data[49], data[50], data[51]]),
107 text_encoding,
108 // Offset 60: user version.
109 user_version: u32::from_be_bytes([data[60], data[61], data[62], data[63]]),
110 // Offset 68: application id (SQLite ≥ 3.8.6).
111 // The header is 100 bytes so offset 68+3=71 is always in range.
112 application_id: u32::from_be_bytes([data[68], data[69], data[70], data[71]]),
113 };
114
115 Ok(Self { data, header })
116 }
117
118 /// Return the byte slice for the given page (1-indexed, as per SQLite spec).
119 ///
120 /// # Errors
121 /// Returns [`GpkgError::InvalidFormat`] if `page_num` is 0 or out of range.
122 pub fn page(&self, page_num: u32) -> Result<&[u8], GpkgError> {
123 if page_num == 0 {
124 return Err(GpkgError::InvalidFormat(
125 "Page numbers are 1-indexed; 0 is invalid".into(),
126 ));
127 }
128 let page_size = self.header.page_size as usize;
129 let offset = (page_num as usize - 1) * page_size;
130 let end = offset + page_size;
131 if end > self.data.len() {
132 return Err(GpkgError::InvalidFormat(format!(
133 "Page {page_num} out of range (file has {} bytes, need {end})",
134 self.data.len()
135 )));
136 }
137 Ok(&self.data[offset..end])
138 }
139
140 /// Return the number of pages, preferring the header value when non-zero.
141 pub fn page_count(&self) -> u32 {
142 if self.header.db_size_pages > 0 {
143 self.header.db_size_pages
144 } else {
145 (self.data.len() / self.header.page_size as usize) as u32
146 }
147 }
148
149 /// Return `true` when the file contains at least one complete page.
150 pub fn is_valid(&self) -> bool {
151 self.data.len() >= self.header.page_size as usize
152 }
153}