Skip to main content

chat4n6_sqlite_forensics/
wal.rs

1use crate::btree::parse_table_leaf_page;
2use crate::record::RecoveredRecord;
3use chat4n6_plugin_api::{EvidenceSource, WalDelta, WalDeltaStatus};
4use std::collections::BTreeMap;
5
6pub const WAL_MAGIC_1: u32 = 0x377f0682;
7pub const WAL_MAGIC_2: u32 = 0x377f0683;
8pub const WAL_HEADER_SIZE: usize = 32;
9pub const WAL_FRAME_HEADER_SIZE: usize = 24;
10
11pub fn is_wal_header(data: &[u8]) -> bool {
12    if data.len() < 4 {
13        return false;
14    }
15    let magic = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
16    magic == WAL_MAGIC_1 || magic == WAL_MAGIC_2
17}
18
19pub fn wal_frame_offset(frame_index: usize, page_size: u32) -> u64 {
20    (WAL_HEADER_SIZE + frame_index * (WAL_FRAME_HEADER_SIZE + page_size as usize)) as u64
21}
22
23#[derive(Debug)]
24pub struct WalHeader {
25    pub page_size: u32,
26    pub checkpoint_seq: u32,
27    pub salt1: u32,
28    pub salt2: u32,
29}
30
31impl WalHeader {
32    pub fn parse(data: &[u8]) -> Option<Self> {
33        if data.len() < 32 || !is_wal_header(data) {
34            return None;
35        }
36        Some(Self {
37            page_size: u32::from_be_bytes([data[8], data[9], data[10], data[11]]),
38            checkpoint_seq: u32::from_be_bytes([data[12], data[13], data[14], data[15]]),
39            salt1: u32::from_be_bytes([data[16], data[17], data[18], data[19]]),
40            salt2: u32::from_be_bytes([data[20], data[21], data[22], data[23]]),
41        })
42    }
43}
44
45#[derive(Debug, Clone)]
46pub struct WalFrame {
47    pub page_number: u32,
48    /// Non-zero means this is a commit frame.
49    pub db_size_after_commit: u32,
50    pub salt1: u32,
51    pub salt2: u32,
52    /// Byte offset of the page data within the WAL byte slice.
53    pub page_data_offset: usize,
54}
55
56/// Parse all frames from a WAL file, grouped by salt1 (transaction identifier).
57/// Returns BTreeMap<salt1, Vec<WalFrame>> preserving file order within each group.
58///
59/// Note: salt1 values are random — BTreeMap ordering by key does NOT imply
60/// time ordering. Use file position (frame index) to determine recency.
61pub fn parse_wal_frames(wal: &[u8], page_size: u32) -> BTreeMap<u32, Vec<WalFrame>> {
62    let mut map: BTreeMap<u32, Vec<WalFrame>> = BTreeMap::new();
63    if !is_wal_header(wal) {
64        return map;
65    }
66    let frame_size = WAL_FRAME_HEADER_SIZE + page_size as usize;
67    let mut idx = 0;
68    loop {
69        let frame_off = WAL_HEADER_SIZE + idx * frame_size;
70        if frame_off + WAL_FRAME_HEADER_SIZE > wal.len() {
71            break;
72        }
73        let fh = &wal[frame_off..frame_off + WAL_FRAME_HEADER_SIZE];
74        let page_number = u32::from_be_bytes([fh[0], fh[1], fh[2], fh[3]]);
75        let db_size = u32::from_be_bytes([fh[4], fh[5], fh[6], fh[7]]);
76        let salt1 = u32::from_be_bytes([fh[8], fh[9], fh[10], fh[11]]);
77        let salt2 = u32::from_be_bytes([fh[12], fh[13], fh[14], fh[15]]);
78        if page_number == 0 {
79            break;
80        }
81        let page_data_end = frame_off + frame_size;
82        if page_data_end > wal.len() {
83            break;
84        }
85        map.entry(salt1).or_default().push(WalFrame {
86            page_number,
87            db_size_after_commit: db_size,
88            salt1,
89            salt2,
90            page_data_offset: frame_off + WAL_FRAME_HEADER_SIZE,
91        });
92        idx += 1;
93    }
94    map
95}
96
97/// Layer 2: extract records from WAL frames that haven't been checkpointed to main DB.
98///
99/// Processes ALL WAL frames (all salt groups, in file order). A frame is considered
100/// unapplied if its page content differs from the corresponding main DB page.
101/// Tags records as `EvidenceSource::WalPending`.
102pub fn recover_layer2(
103    wal: &[u8],
104    db: &[u8],
105    page_size: u32,
106    table_name: &str,
107) -> Vec<RecoveredRecord> {
108    let mut records = Vec::new();
109    let frames = parse_wal_frames(wal, page_size);
110
111    // Process all salt groups in file order (BTreeMap iteration is by salt1 key,
112    // but within each group frames are in file order). We process all groups
113    // because forensic recovery must not discard any session's data.
114    for frame_group in frames.values() {
115        for frame in frame_group {
116            let wal_page = match wal
117                .get(frame.page_data_offset..frame.page_data_offset + page_size as usize)
118            {
119                Some(p) => p,
120                None => continue,
121            };
122            // Frame is "pending" (unapplied) if the main DB page differs from WAL page
123            let db_offset = (frame.page_number as usize - 1) * page_size as usize;
124            let db_page = db.get(db_offset..db_offset + page_size as usize);
125            if db_page == Some(wal_page) {
126                continue; // already checkpointed
127            }
128            let bhdr = if frame.page_number == 1 { 100 } else { 0 };
129            let mut page_records =
130                parse_table_leaf_page(wal_page, bhdr, frame.page_number, page_size, table_name);
131            for r in &mut page_records {
132                r.source = EvidenceSource::WalPending;
133            }
134            records.extend(page_records);
135        }
136    }
137    records
138}
139
140/// Layer 3: compare WAL pages against main DB to detect row-level changes.
141///
142/// For each database page that appears in the WAL, compares the WAL version
143/// against the main DB version. Produces `WalDelta` entries tagged as
144/// AddedInWal / DeletedInWal / ModifiedInWal.
145///
146/// Deduplication: for each (table, row_id), only the **last-written** delta
147/// (by file position) is retained, preventing contradictory entries when
148/// a row is modified across multiple WAL sessions.
149pub fn recover_layer3_deltas(
150    wal: &[u8],
151    db: &[u8],
152    page_size: u32,
153    table_name: &str,
154) -> Vec<WalDelta> {
155    use std::collections::HashMap;
156
157    // Use a HashMap keyed by row_id to keep only the last-seen delta per row.
158    // BTreeMap iteration order is by salt1 value (not file position), so we
159    // process frames in the order they appear within each group (file order)
160    // but accept that cross-group ordering may not be strictly chronological.
161    // For forensic purposes this is acceptable — we expose all differences.
162    let mut seen: HashMap<i64, WalDeltaStatus> = HashMap::new();
163
164    let frames = parse_wal_frames(wal, page_size);
165    for frame_group in frames.values() {
166        for frame in frame_group {
167            let wal_page = match wal
168                .get(frame.page_data_offset..frame.page_data_offset + page_size as usize)
169            {
170                Some(p) => p,
171                None => continue,
172            };
173            let db_offset = (frame.page_number as usize - 1) * page_size as usize;
174            let bhdr = if frame.page_number == 1 { 100 } else { 0 };
175
176            let db_page = match db.get(db_offset..db_offset + page_size as usize) {
177                Some(p) => p,
178                None => {
179                    // Page absent in main DB — all WAL rows are additions
180                    let wal_records = parse_table_leaf_page(
181                        wal_page,
182                        bhdr,
183                        frame.page_number,
184                        page_size,
185                        table_name,
186                    );
187                    for r in wal_records {
188                        if let Some(row_id) = r.row_id {
189                            seen.insert(row_id, WalDeltaStatus::AddedInWal);
190                        }
191                    }
192                    continue;
193                }
194            };
195            if wal_page == db_page {
196                continue;
197            }
198
199            let wal_records =
200                parse_table_leaf_page(wal_page, bhdr, frame.page_number, page_size, table_name);
201            let db_records =
202                parse_table_leaf_page(db_page, bhdr, frame.page_number, page_size, table_name);
203
204            let wal_ids: HashMap<i64, _> = wal_records
205                .iter()
206                .filter_map(|r| r.row_id.map(|id| (id, &r.values)))
207                .collect();
208            let db_ids: HashMap<i64, _> = db_records
209                .iter()
210                .filter_map(|r| r.row_id.map(|id| (id, &r.values)))
211                .collect();
212
213            for &id in wal_ids.keys() {
214                if !db_ids.contains_key(&id) {
215                    seen.insert(id, WalDeltaStatus::AddedInWal);
216                }
217            }
218            for &id in db_ids.keys() {
219                if !wal_ids.contains_key(&id) {
220                    seen.insert(id, WalDeltaStatus::DeletedInWal);
221                }
222            }
223            for (&id, wal_vals) in &wal_ids {
224                if let Some(db_vals) = db_ids.get(&id) {
225                    if wal_vals != db_vals {
226                        seen.insert(id, WalDeltaStatus::ModifiedInWal);
227                    }
228                }
229            }
230        }
231    }
232
233    seen.into_iter()
234        .map(|(row_id, status)| WalDelta {
235            table: table_name.to_string(),
236            row_id,
237            status,
238        })
239        .collect()
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_wal_magic_detection() {
248        let magic1 = 0x377f0682u32.to_be_bytes();
249        let magic2 = 0x377f0683u32.to_be_bytes();
250        assert!(is_wal_header(&magic1));
251        assert!(is_wal_header(&magic2));
252        assert!(!is_wal_header(b"\x00\x00\x00\x00"));
253    }
254
255    #[test]
256    fn test_wal_frame_offset_calculation() {
257        let page_size = 4096u32;
258        let frame_0_offset = wal_frame_offset(0, page_size);
259        assert_eq!(frame_0_offset, 32);
260        let frame_1_offset = wal_frame_offset(1, page_size);
261        assert_eq!(frame_1_offset, 32 + 24 + 4096);
262    }
263
264    #[test]
265    fn test_parse_wal_header() {
266        let mut header = vec![0u8; 32];
267        header[0..4].copy_from_slice(&0x377f0682u32.to_be_bytes());
268        header[4..8].copy_from_slice(&3007000u32.to_be_bytes());
269        header[8..12].copy_from_slice(&4096u32.to_be_bytes());
270        header[12..16].copy_from_slice(&7u32.to_be_bytes()); // checkpoint_seq
271        header[16..20].copy_from_slice(&42u32.to_be_bytes()); // salt1
272        header[20..24].copy_from_slice(&99u32.to_be_bytes()); // salt2
273        let wh = WalHeader::parse(&header).unwrap();
274        assert_eq!(wh.page_size, 4096);
275        assert_eq!(wh.checkpoint_seq, 7);
276        assert_eq!(wh.salt1, 42);
277        assert_eq!(wh.salt2, 99);
278    }
279}
280
281#[cfg(test)]
282mod integration_tests {
283    use super::*;
284
285    fn make_wal_bytes(page_size: u32, frames: &[(u32, u32, u32, &[u8])]) -> Vec<u8> {
286        // frames: (page_number, db_size_after_commit, salt1, page_data)
287        let mut wal = vec![0u8; WAL_HEADER_SIZE];
288        wal[0..4].copy_from_slice(&WAL_MAGIC_1.to_be_bytes());
289        wal[8..12].copy_from_slice(&page_size.to_be_bytes());
290        for (page_number, db_size, salt1, page_data) in frames {
291            let mut frame_header = vec![0u8; WAL_FRAME_HEADER_SIZE];
292            frame_header[0..4].copy_from_slice(&page_number.to_be_bytes());
293            frame_header[4..8].copy_from_slice(&db_size.to_be_bytes());
294            frame_header[8..12].copy_from_slice(&salt1.to_be_bytes());
295            wal.extend_from_slice(&frame_header);
296            let mut padded = vec![0u8; page_size as usize];
297            let copy_len = page_data.len().min(page_size as usize);
298            padded[..copy_len].copy_from_slice(&page_data[..copy_len]);
299            wal.extend_from_slice(&padded);
300        }
301        wal
302    }
303
304    #[test]
305    fn test_parse_wal_frames_single_frame() {
306        let page_size = 4096u32;
307        let page_data = vec![0xABu8; page_size as usize];
308        let wal = make_wal_bytes(page_size, &[(2, 1, 42, &page_data)]);
309        let frames = parse_wal_frames(&wal, page_size);
310        assert_eq!(frames.len(), 1);
311        let group = frames.get(&42).unwrap();
312        assert_eq!(group.len(), 1);
313        assert_eq!(group[0].page_number, 2);
314        assert_eq!(group[0].salt1, 42);
315    }
316
317    #[test]
318    fn test_parse_wal_frames_empty_wal() {
319        let frames = parse_wal_frames(&[], 4096);
320        assert!(frames.is_empty());
321    }
322
323    #[test]
324    fn test_parse_wal_frames_groups_by_salt1() {
325        let page_size = 4096u32;
326        let pd = vec![0u8; page_size as usize];
327        let wal = make_wal_bytes(
328            page_size,
329            &[(1, 0, 100, &pd), (2, 1, 100, &pd), (3, 1, 200, &pd)],
330        );
331        let frames = parse_wal_frames(&wal, page_size);
332        assert_eq!(frames.get(&100).unwrap().len(), 2);
333        assert_eq!(frames.get(&200).unwrap().len(), 1);
334    }
335}