Skip to main content

reddb_server/storage/import/
sqlite.rs

1use std::fs::File;
2use std::io::{self, Read, Seek, SeekFrom};
3use std::path::Path;
4
5#[derive(Debug)]
6pub enum SqliteError {
7    Io(io::Error),
8    InvalidFormat,
9    InvalidPageType(u8),
10    TableNotFound(String),
11    UnsupportedFeature(String),
12}
13
14impl From<io::Error> for SqliteError {
15    fn from(e: io::Error) -> Self {
16        Self::Io(e)
17    }
18}
19
20pub struct SqliteReader {
21    file: File,
22    page_size: u32,
23}
24
25#[derive(Debug, Clone)]
26pub struct SqliteValue {
27    pub data: Vec<u8>,
28    pub data_type: SqliteType,
29}
30
31impl SqliteValue {
32    pub fn as_string(&self) -> Option<String> {
33        String::from_utf8(self.data.clone()).ok()
34    }
35}
36
37#[derive(Debug, Clone, PartialEq)]
38pub enum SqliteType {
39    Null,
40    Integer,
41    Float,
42    Text,
43    Blob,
44}
45
46impl SqliteReader {
47    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, SqliteError> {
48        let mut file = File::open(path)?;
49
50        // Read header (100 bytes)
51        let mut header = [0u8; 100];
52        if file.read(&mut header)? != 100 {
53            return Err(SqliteError::InvalidFormat);
54        }
55
56        // Check magic
57        if &header[0..16] != b"SQLite format 3\0" {
58            return Err(SqliteError::InvalidFormat);
59        }
60
61        // Page size at offset 16 (BE)
62        let page_size_be = u16::from_be_bytes([header[16], header[17]]);
63        let page_size = if page_size_be == 1 {
64            65536
65        } else {
66            page_size_be as u32
67        };
68
69        Ok(Self { file, page_size })
70    }
71
72    /// Read a generic page
73    fn read_page(&mut self, page_id: u32) -> Result<Vec<u8>, SqliteError> {
74        let offset = (page_id as u64 - 1) * self.page_size as u64;
75        self.file.seek(SeekFrom::Start(offset))?;
76
77        let mut buf = vec![0u8; self.page_size as usize];
78        self.file.read_exact(&mut buf)?;
79        Ok(buf)
80    }
81
82    /// Read a varint (1-9 bytes)
83    fn read_varint(data: &[u8], pos: &mut usize) -> u64 {
84        let mut result = 0u64;
85        for _i in 0..8 {
86            if *pos >= data.len() {
87                return result;
88            }
89            let byte = data[*pos];
90            *pos += 1;
91            result = (result << 7) | ((byte & 0x7F) as u64);
92            if (byte & 0x80) == 0 {
93                return result;
94            }
95        }
96        // 9th byte uses all 8 bits
97        if *pos < data.len() {
98            let byte = data[*pos];
99            *pos += 1;
100            result = (result << 8) | (byte as u64);
101        }
102        result
103    }
104
105    /// Parse a record from cell content
106    fn parse_record(data: &[u8]) -> Result<Vec<SqliteValue>, SqliteError> {
107        let mut pos = 0;
108        let _header_len = Self::read_varint(data, &mut pos);
109
110        // Read serial types until we reach the end of header
111        // Wait, header_len includes the size varint itself.
112        // Let's verify: "The header begins with a single varint which determines the total number of bytes in the header. The varint value is the size of the header in bytes including the size varint itself."
113
114        let header_start = 0;
115        // We already read header_len varint. We need to know how many bytes it took.
116        // Let's restart to be precise.
117        pos = 0;
118        let header_len = Self::read_varint(data, &mut pos) as usize;
119        let header_end = header_start + header_len;
120
121        let mut serial_types = Vec::new();
122        while pos < header_end {
123            serial_types.push(Self::read_varint(data, &mut pos));
124        }
125
126        let mut values = Vec::new();
127
128        for type_code in serial_types {
129            let (len, type_enum) = match type_code {
130                0 => (0, SqliteType::Null),
131                1 => (1, SqliteType::Integer), // 8-bit
132                2 => (2, SqliteType::Integer), // 16-bit
133                3 => (3, SqliteType::Integer), // 24-bit
134                4 => (4, SqliteType::Integer), // 32-bit
135                5 => (6, SqliteType::Integer), // 48-bit
136                6 => (8, SqliteType::Integer), // 64-bit
137                7 => (8, SqliteType::Float),
138                8 => (0, SqliteType::Integer), // 0
139                9 => (0, SqliteType::Integer), // 1
140                n if n >= 12 && n % 2 == 0 => (((n - 12) / 2) as usize, SqliteType::Blob),
141                n if n >= 13 && n % 2 == 1 => (((n - 13) / 2) as usize, SqliteType::Text),
142                _ => (0, SqliteType::Null), // Reserved/Internal
143            };
144
145            let val_data = if len > 0 {
146                if pos + len > data.len() {
147                    return Err(SqliteError::InvalidFormat); // Truncated
148                }
149                let d = data[pos..pos + len].to_vec();
150                pos += len;
151                d
152            } else {
153                Vec::new()
154            };
155
156            values.push(SqliteValue {
157                data: val_data,
158                data_type: type_enum,
159            });
160        }
161
162        Ok(values)
163    }
164
165    /// Scan a table for all records
166    /// Note: This is a simplified scanner that assumes the table is a B-Tree Leaf or Interior.
167    /// It traverses the tree.
168    pub fn scan_table(&mut self, root_page: u32) -> Result<Vec<Vec<SqliteValue>>, SqliteError> {
169        let mut records = Vec::new();
170        let mut queue = vec![root_page];
171
172        while let Some(page_id) = queue.pop() {
173            let raw_page = self.read_page(page_id)?;
174            let page = &raw_page;
175
176            // Header offset: 0 unless it's page 1, then 100
177            let header_offset = if page_id == 1 { 100 } else { 0 };
178
179            if page.len() < header_offset + 8 {
180                continue;
181            }
182
183            let page_type = page[header_offset];
184            let cell_count =
185                u16::from_be_bytes([page[header_offset + 3], page[header_offset + 4]]) as usize;
186
187            let cell_arr_start = header_offset + 8; // Page 1 header logic is tricky, usually handled by offset
188
189            // Logic for Leaf Table (0x0D) and Interior Table (0x05)
190            match page_type {
191                0x0D => {
192                    // Leaf Table
193                    for i in 0..cell_count {
194                        let ptr_offset = cell_arr_start + (i * 2);
195                        let cell_ptr =
196                            u16::from_be_bytes([page[ptr_offset], page[ptr_offset + 1]]) as usize;
197                        if cell_ptr >= page.len() {
198                            continue;
199                        }
200
201                        // Parse cell
202                        let mut pos = cell_ptr;
203                        let _payload_len = Self::read_varint(page, &mut pos);
204                        let _row_id = Self::read_varint(page, &mut pos);
205
206                        // remaining is payload
207                        // Note: If payload is large, it spills to overflow pages.
208                        // Simplified: We assume payload fits or we just read what's there (might be truncated).
209                        // Chrome logins are small, usually fit.
210
211                        // To handle overflow properly requires reading (payload_len) bytes.
212                        // For now let's pass the slice from pos to end, parse_record handles header length.
213
214                        if pos < page.len() {
215                            if let Ok(record) = Self::parse_record(&page[pos..]) {
216                                records.push(record);
217                            }
218                        }
219                    }
220                }
221                0x05 => {
222                    // Interior Table
223                    // Iterate cells to find child pages
224                    for i in 0..cell_count {
225                        let ptr_offset = cell_arr_start + (i * 2);
226                        let cell_ptr =
227                            u16::from_be_bytes([page[ptr_offset], page[ptr_offset + 1]]) as usize;
228
229                        let pos = cell_ptr;
230                        let left_child = u32::from_be_bytes([
231                            page[pos],
232                            page[pos + 1],
233                            page[pos + 2],
234                            page[pos + 3],
235                        ]);
236                        queue.push(left_child);
237
238                        // Key (rowid) follows, but we don't need it for full scan
239                    }
240                    // Right-most child
241                    let right_child = u32::from_be_bytes([
242                        page[header_offset + 8],
243                        page[header_offset + 9],
244                        page[header_offset + 10],
245                        page[header_offset + 11],
246                    ]);
247                    queue.push(right_child);
248                }
249                _ => {} // Ignore index pages etc
250            }
251        }
252
253        Ok(records)
254    }
255
256    /// Find root page of a table by name
257    pub fn find_table_root(&mut self, name: &str) -> Result<u32, SqliteError> {
258        // Scan sqlite_schema (page 1)
259        // Note: sqlite_schema is a table rooted at page 1.
260        let rows = self.scan_table(1)?;
261
262        for row in rows {
263            // Schema: type, name, tbl_name, rootpage, sql
264            if row.len() >= 4 {
265                if let Some(type_str) = row[0].as_string() {
266                    if type_str == "table" {
267                        if let Some(tbl_name) = row[1].as_string() {
268                            if tbl_name == name {
269                                // rootpage is 4th column (index 3), usually Integer
270                                if let SqliteType::Integer = row[3].data_type {
271                                    // Parse integer manually from LE/BE/Varint? No, parse_record returns raw data
272                                    // based on type.
273                                    // Wait, parse_record implementation for Integer:
274                                    // 1 byte: 8-bit, 2: 16-bit, etc.
275                                    // I need a helper to cast data to u32
276                                    return Ok(Self::parse_int(&row[3].data) as u32);
277                                }
278                            }
279                        }
280                    }
281                }
282            }
283        }
284
285        Err(SqliteError::TableNotFound(name.to_string()))
286    }
287
288    fn parse_int(data: &[u8]) -> i64 {
289        let mut val = 0i64;
290        for &b in data {
291            val = (val << 8) | (b as i64);
292        }
293        val
294    }
295}