chat4n6_sqlite_forensics/
unalloc.rs1use crate::record::{decode_serial_type, RecoveredRecord, SqlValue};
2use crate::varint::read_varint;
3use chat4n6_plugin_api::{EvidenceSource, UnallocatedRegion};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone)]
10pub struct ColumnPattern {
11 pub table: String,
12 pub col_index: usize,
13 pub serial_type_hint: u64,
15}
16
17pub struct SignatureDb {
19 patterns: Vec<ColumnPattern>,
20}
21
22impl SignatureDb {
23 pub fn patterns_for(&self, table: &str) -> Vec<&ColumnPattern> {
25 let mut v: Vec<&ColumnPattern> =
26 self.patterns.iter().filter(|p| p.table == table).collect();
27 v.sort_by_key(|p| p.col_index);
28 v
29 }
30}
31
32pub fn learn_signatures(records: &[RecoveredRecord]) -> SignatureDb {
36 let mut counts: HashMap<(String, usize), HashMap<u64, usize>> = HashMap::new();
38
39 for rec in records {
40 for (col_index, value) in rec.values.iter().enumerate() {
41 let st = value_to_serial_type(value);
42 *counts
43 .entry((rec.table.clone(), col_index))
44 .or_default()
45 .entry(st)
46 .or_insert(0) += 1;
47 }
48 }
49
50 let mut patterns: Vec<ColumnPattern> = counts
51 .into_iter()
52 .map(|((table, col_index), type_counts)| {
53 let serial_type_hint = type_counts
54 .into_iter()
55 .max_by_key(|&(_, c)| c)
56 .map(|(st, _)| st)
57 .unwrap_or(0);
58 ColumnPattern {
59 table,
60 col_index,
61 serial_type_hint,
62 }
63 })
64 .collect();
65
66 patterns.sort_by(|a, b| a.table.cmp(&b.table).then(a.col_index.cmp(&b.col_index)));
68
69 SignatureDb { patterns }
70}
71
72fn value_to_serial_type(value: &SqlValue) -> u64 {
74 match value {
75 SqlValue::Null => 0,
76 SqlValue::Int(v) => {
77 if *v == 0 || *v == 1 {
79 8 } else if (-128..=127).contains(v) {
81 1
82 } else if (-32768..=32767).contains(v) {
83 2
84 } else if (-8388608..=8388607).contains(v) {
85 3
86 } else if (-2147483648..=2147483647).contains(v) {
87 4
88 } else {
89 6
90 }
91 }
92 SqlValue::Real(_) => 7,
93 SqlValue::Text(s) => {
94 let len = s.len() as u64;
95 13 + len * 2
96 }
97 SqlValue::Blob(b) => {
98 let len = b.len() as u64;
99 12 + len * 2
100 }
101 }
102}
103
104pub fn carve_unallocated(
109 region: &UnallocatedRegion,
110 sig_db: &SignatureDb,
111 table_hint: &str,
112) -> Vec<RecoveredRecord> {
113 let data = ®ion.data;
114 let abs_base = region.offset;
115 let patterns = sig_db.patterns_for(table_hint);
116
117 let mut results = Vec::new();
118
119 if data.is_empty() || patterns.is_empty() {
120 return results;
123 }
124
125 let col_count = patterns.iter().map(|p| p.col_index + 1).max().unwrap_or(0);
126
127 if col_count == 0 {
128 return results;
129 }
130
131 let min_len = 1 + col_count;
133
134 let mut pos = 0;
135 while pos + min_len <= data.len() {
136 if let Some((record, consumed)) = try_parse_record(
137 &data[pos..],
138 abs_base + pos as u64,
139 table_hint,
140 col_count,
141 &patterns,
142 ) {
143 results.push(record);
144 pos += consumed;
145 } else {
146 pos += 1;
147 }
148 }
149
150 results
151}
152
153fn try_parse_record(
156 data: &[u8],
157 abs_offset: u64,
158 table: &str,
159 col_count: usize,
160 patterns: &[&ColumnPattern],
161) -> Option<(RecoveredRecord, usize)> {
162 let (header_len, hl_consumed) = read_varint(data, 0)?;
164 let header_end = header_len as usize;
165
166 if header_end < hl_consumed || header_end > data.len() || header_end > 512
169 {
171 return None;
172 }
173
174 let mut pos = hl_consumed;
176 let mut serial_types: Vec<u64> = Vec::with_capacity(col_count);
177 while pos < header_end {
178 let (st, consumed) = read_varint(data, pos)?;
179 serial_types.push(st);
180 pos += consumed;
181 if serial_types.len() >= col_count {
182 break;
183 }
184 }
185
186 if serial_types.is_empty() {
187 return None;
188 }
189
190 let matched = serial_types
192 .iter()
193 .enumerate()
194 .filter(|&(i, &st)| {
195 patterns
196 .iter()
197 .any(|p| p.col_index == i && serial_types_compatible(st, p.serial_type_hint))
198 })
199 .count();
200
201 if matched == 0 {
202 return None;
203 }
204
205 let mut data_pos = header_end;
208 let mut values: Vec<SqlValue> = Vec::with_capacity(serial_types.len());
209 for &st in &serial_types {
210 match decode_serial_type(st, data, data_pos) {
211 Some((val, consumed)) => {
212 data_pos += consumed;
213 values.push(val);
214 }
215 None => return None,
216 }
217 }
218
219 let total_cols = serial_types.len().max(col_count);
220 let confidence_pct = ((matched * 100) / total_cols.max(1)).min(100) as u8;
221 let confidence = confidence_pct as f32 / 100.0;
222
223 Some((
224 RecoveredRecord {
225 table: table.to_string(),
226 row_id: None,
227 values,
228 source: EvidenceSource::CarvedUnalloc { confidence_pct },
229 offset: abs_offset,
230 confidence,
231 },
232 data_pos,
233 ))
234}
235
236fn serial_types_compatible(found: u64, hint: u64) -> bool {
240 if found == hint {
241 return true;
242 }
243 let int_class = |st: u64| matches!(st, 1..=6 | 8 | 9);
245 if int_class(found) && int_class(hint) {
246 return true;
247 }
248 if found == 7 && hint == 7 {
250 return true;
251 }
252 if found >= 13 && found % 2 == 1 && hint >= 13 && hint % 2 == 1 {
254 return true;
255 }
256 if found >= 12 && found.is_multiple_of(2) && hint >= 12 && hint.is_multiple_of(2) {
258 return true;
259 }
260 false
261}
262
263pub fn recover_layer6(
267 regions: &[UnallocatedRegion],
268 sig_db: &SignatureDb,
269 table_hint: &str,
270) -> Vec<RecoveredRecord> {
271 regions
272 .iter()
273 .flat_map(|r| carve_unallocated(r, sig_db, table_hint))
274 .collect()
275}
276
277#[cfg(test)]
280mod tests {
281 use super::*;
282 use chat4n6_plugin_api::EvidenceSource;
283
284 fn make_record(table: &str, values: Vec<SqlValue>) -> RecoveredRecord {
285 RecoveredRecord {
286 table: table.to_string(),
287 row_id: None,
288 values,
289 source: EvidenceSource::Live,
290 offset: 0,
291 confidence: 1.0,
292 }
293 }
294
295 #[test]
298 fn test_learn_signatures_empty() {
299 let db = learn_signatures(&[]);
300 assert!(
301 db.patterns.is_empty(),
302 "Empty records should yield empty signature db"
303 );
304 }
305
306 #[test]
309 fn test_learn_signatures_basic() {
310 let records = vec![
311 make_record(
312 "messages",
313 vec![SqlValue::Int(1), SqlValue::Text("hello".into())],
314 ),
315 make_record(
316 "messages",
317 vec![SqlValue::Int(2), SqlValue::Text("world".into())],
318 ),
319 make_record(
320 "messages",
321 vec![SqlValue::Int(3), SqlValue::Text("foo".into())],
322 ),
323 ];
324 let db = learn_signatures(&records);
325 let pats = db.patterns_for("messages");
326 assert_eq!(
327 pats.len(),
328 2,
329 "Should have 2 column patterns for 'messages'"
330 );
331
332 let col0 = pats
334 .iter()
335 .find(|p| p.col_index == 0)
336 .expect("col 0 pattern");
337 assert!(
338 matches!(col0.serial_type_hint, 1..=6 | 8 | 9),
339 "col 0 hint should be an int type, got {}",
340 col0.serial_type_hint
341 );
342
343 let col1 = pats
345 .iter()
346 .find(|p| p.col_index == 1)
347 .expect("col 1 pattern");
348 assert!(
349 col1.serial_type_hint >= 13 && col1.serial_type_hint % 2 == 1,
350 "col 1 hint should be text serial type, got {}",
351 col1.serial_type_hint
352 );
353 }
354
355 #[test]
358 fn test_carve_unallocated_finds_record() {
359 let live = vec![make_record("test_table", vec![SqlValue::Int(99)])];
366 let sig_db = learn_signatures(&live);
367
368 let record_bytes: Vec<u8> = vec![
369 0x02, 0x01, 0x2a, ];
373
374 let region = UnallocatedRegion {
375 offset: 1000,
376 data: record_bytes,
377 };
378
379 let found = carve_unallocated(®ion, &sig_db, "test_table");
380 assert!(!found.is_empty(), "Should find the embedded record");
381 assert!(found[0].values.contains(&SqlValue::Int(42)));
382 assert_eq!(found[0].offset, 1000);
383 assert!(found[0].confidence > 0.0);
385 assert!(matches!(
387 found[0].source,
388 EvidenceSource::CarvedUnalloc { confidence_pct } if confidence_pct > 0
389 ));
390 }
391
392 #[test]
395 fn test_carve_unallocated_empty_region() {
396 let live = vec![make_record("test_table", vec![SqlValue::Int(1)])];
397 let sig_db = learn_signatures(&live);
398
399 let region = UnallocatedRegion {
400 offset: 0,
401 data: vec![],
402 };
403
404 let found = carve_unallocated(®ion, &sig_db, "test_table");
405 assert!(found.is_empty(), "Empty region should yield no results");
406 }
407
408 #[test]
411 fn test_recover_layer6_combines_regions() {
412 let live = vec![make_record("tbl", vec![SqlValue::Int(0)])];
413 let sig_db = learn_signatures(&live);
414
415 let record_bytes: Vec<u8> = vec![0x02, 0x01, 0x07];
417
418 let regions = vec![
419 UnallocatedRegion {
420 offset: 0,
421 data: record_bytes.clone(),
422 },
423 UnallocatedRegion {
424 offset: 5000,
425 data: record_bytes.clone(),
426 },
427 ];
428
429 let results = recover_layer6(®ions, &sig_db, "tbl");
430 assert_eq!(results.len(), 2, "Should combine records from both regions");
431 assert_ne!(results[0].offset, results[1].offset);
433 }
434}