Skip to main content

oxigdal_gpkg/
sqlite_reader.rs

1//! Pure Rust SQLite binary format reader.
2//!
3//! Parses the 100-byte SQLite file header and provides page-level access.
4//! Reference: <https://www.sqlite.org/fileformat.html>
5
6use crate::error::GpkgError;
7
8/// SQLite text encoding identifier.
9#[derive(Debug, Clone, PartialEq)]
10pub enum TextEncoding {
11    /// UTF-8 encoding (value 1 in the header).
12    Utf8,
13    /// UTF-16 little-endian encoding (value 2).
14    Utf16Le,
15    /// UTF-16 big-endian encoding (value 3).
16    Utf16Be,
17}
18
19/// Parsed 100-byte SQLite file header.
20#[derive(Debug, Clone)]
21pub struct SqliteHeader {
22    /// Actual page size in bytes (raw value 1 maps to 65536).
23    pub page_size: u32,
24    /// Database size in pages (may be 0 for older files; use `SqliteReader::page_count`).
25    pub db_size_pages: u32,
26    /// Page number of the first trunk page of the freelist (0 if no freelist).
27    pub first_freelist_page: u32,
28    /// Total number of free pages in the freelist.
29    pub freelist_page_count: u32,
30    /// Schema cookie (incremented on each schema change).
31    pub schema_version: u32,
32    /// Schema format number (1–4).
33    pub schema_format: u8,
34    /// Suggested default cache size in pages (signed).
35    pub default_cache_size: i32,
36    /// Text encoding used by the database.
37    pub text_encoding: TextEncoding,
38    /// User-defined version number (offset 60).
39    pub user_version: u32,
40    /// Application ID written by `PRAGMA application_id` (offset 68).
41    /// Value `0x47504B47` ("GPKG") identifies a GeoPackage file.
42    pub application_id: u32,
43}
44
45impl SqliteHeader {
46    /// Returns `true` if the application_id marks this as a GeoPackage.
47    pub fn is_geopackage(&self) -> bool {
48        self.application_id == 0x4750_4B47
49    }
50}
51
52/// Minimal SQLite binary file parser providing header access and page slicing.
53pub struct SqliteReader {
54    /// Raw file bytes.
55    data: Vec<u8>,
56    /// Parsed file header.
57    pub header: SqliteHeader,
58}
59
60impl SqliteReader {
61    /// Parse a SQLite file from its raw bytes.
62    ///
63    /// # Errors
64    /// Returns [`GpkgError::InvalidFormat`] when the data is too short or does
65    /// not begin with the SQLite magic string.
66    pub fn from_bytes(data: Vec<u8>) -> Result<Self, GpkgError> {
67        const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
68
69        if data.len() < 100 {
70            return Err(GpkgError::InvalidFormat(
71                "Data too short for SQLite header (need ≥ 100 bytes)".into(),
72            ));
73        }
74        if !data.starts_with(SQLITE_MAGIC) {
75            return Err(GpkgError::InvalidFormat("Not a SQLite file".into()));
76        }
77
78        // Offset 16: page size (2 bytes, big-endian). Value 1 means 65536.
79        let page_size_raw = u16::from_be_bytes([data[16], data[17]]) as u32;
80        let page_size = if page_size_raw == 1 {
81            65536
82        } else {
83            page_size_raw
84        };
85
86        // Offset 56: text encoding (4 bytes, big-endian).
87        let text_encoding = match u32::from_be_bytes([data[56], data[57], data[58], data[59]]) {
88            2 => TextEncoding::Utf16Le,
89            3 => TextEncoding::Utf16Be,
90            _ => TextEncoding::Utf8,
91        };
92
93        let header = SqliteHeader {
94            page_size,
95            // Offset 28: database size in pages.
96            db_size_pages: u32::from_be_bytes([data[28], data[29], data[30], data[31]]),
97            // Offset 32: first trunk freelist page.
98            first_freelist_page: u32::from_be_bytes([data[32], data[33], data[34], data[35]]),
99            // Offset 36: total freelist pages.
100            freelist_page_count: u32::from_be_bytes([data[36], data[37], data[38], data[39]]),
101            // Offset 40: schema cookie.
102            schema_version: u32::from_be_bytes([data[40], data[41], data[42], data[43]]),
103            // Offset 44: schema format number.
104            schema_format: data[44],
105            // Offset 48: default cache size (signed).
106            default_cache_size: i32::from_be_bytes([data[48], data[49], data[50], data[51]]),
107            text_encoding,
108            // Offset 60: user version.
109            user_version: u32::from_be_bytes([data[60], data[61], data[62], data[63]]),
110            // Offset 68: application id (SQLite ≥ 3.8.6).
111            // The header is 100 bytes so offset 68+3=71 is always in range.
112            application_id: u32::from_be_bytes([data[68], data[69], data[70], data[71]]),
113        };
114
115        Ok(Self { data, header })
116    }
117
118    /// Return the byte slice for the given page (1-indexed, as per SQLite spec).
119    ///
120    /// # Errors
121    /// Returns [`GpkgError::InvalidFormat`] if `page_num` is 0 or out of range.
122    pub fn page(&self, page_num: u32) -> Result<&[u8], GpkgError> {
123        if page_num == 0 {
124            return Err(GpkgError::InvalidFormat(
125                "Page numbers are 1-indexed; 0 is invalid".into(),
126            ));
127        }
128        let page_size = self.header.page_size as usize;
129        let offset = (page_num as usize - 1) * page_size;
130        let end = offset + page_size;
131        if end > self.data.len() {
132            return Err(GpkgError::InvalidFormat(format!(
133                "Page {page_num} out of range (file has {} bytes, need {end})",
134                self.data.len()
135            )));
136        }
137        Ok(&self.data[offset..end])
138    }
139
140    /// Return the number of pages, preferring the header value when non-zero.
141    pub fn page_count(&self) -> u32 {
142        if self.header.db_size_pages > 0 {
143            self.header.db_size_pages
144        } else {
145            (self.data.len() / self.header.page_size as usize) as u32
146        }
147    }
148
149    /// Return `true` when the file contains at least one complete page.
150    pub fn is_valid(&self) -> bool {
151        self.data.len() >= self.header.page_size as usize
152    }
153}