Skip to main content

ewf_forensic/
integrity.rs

1use flate2::read::ZlibDecoder;
2use md5::{Digest as _, Md5};
3use std::io::Read as _;
4
5const EVF_SIGNATURE: [u8; 8] = [0x45, 0x56, 0x46, 0x09, 0x0d, 0x0a, 0xff, 0x00];
6const FILE_HEADER_SIZE: usize = 13;
7pub(crate) const SECTION_DESCRIPTOR_SIZE: usize = 76;
8const VOLUME_DATA_MIN: usize = 24;
9
10/// Known EWF v1 section type strings.
11const KNOWN_TYPES: &[&str] = &[
12    "header",
13    "header2",
14    "volume",
15    "disk",
16    "table",
17    "table2",
18    "sectors",
19    "hash",
20    "digest",
21    "error2",
22    "session",
23    "done",
24    "next",
25    "data",
26    "ltree",
27    "ltreedata",
28];
29
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum Severity {
32    Info,
33    Warning,
34    Error,
35    Critical,
36}
37
38#[derive(Debug, Clone)]
39pub enum EwfIntegrityAnomaly {
40    InvalidSignature,
41    SegmentNumberZero,
42    SectionDescriptorCrcMismatch {
43        offset: u64,
44        section_type: String,
45        computed: u32,
46        stored: u32,
47    },
48    SectionChainBroken {
49        at_offset: u64,
50        next_offset: u64,
51    },
52    SectionGapNonZero {
53        gap_offset: u64,
54        gap_size: u64,
55    },
56    VolumeSectionMissing,
57    UnknownSectionType {
58        offset: u64,
59        type_name: String,
60    },
61    DoneSectionMissing,
62    ChunkSizeInvalid {
63        sectors_per_chunk: u32,
64        bytes_per_sector: u32,
65    },
66    SectorCountMismatch {
67        declared: u64,
68        expected: u64,
69    },
70    BytesPerSectorInvalid {
71        bytes_per_sector: u32,
72    },
73    TableChunkCountMismatch {
74        in_volume: u32,
75        in_table: u32,
76    },
77    TableEntryOutOfBounds {
78        chunk_index: u32,
79        entry_offset: u64,
80        file_size: u64,
81    },
82    TableEntryOutsideSectorsRange {
83        chunk_index: u32,
84        entry_offset: u64,
85        sectors_start: u64,
86        sectors_end: u64,
87    },
88    SectionGapZero {
89        gap_offset: u64,
90        gap_size: u64,
91    },
92    HashMismatch {
93        computed: [u8; 16],
94        stored: [u8; 16],
95    },
96    HashSectionMissing,
97}
98
99impl EwfIntegrityAnomaly {
100    pub fn severity(&self) -> Severity {
101        match self {
102            Self::InvalidSignature => Severity::Critical,
103            Self::SegmentNumberZero => Severity::Error,
104            Self::SectionDescriptorCrcMismatch { .. } => Severity::Error,
105            Self::SectionChainBroken { .. } => Severity::Critical,
106            Self::SectionGapNonZero { .. } => Severity::Warning,
107            Self::VolumeSectionMissing => Severity::Critical,
108            Self::UnknownSectionType { .. } => Severity::Warning,
109            Self::DoneSectionMissing => Severity::Warning,
110            Self::ChunkSizeInvalid { .. } => Severity::Error,
111            Self::SectorCountMismatch { .. } => Severity::Error,
112            Self::BytesPerSectorInvalid { .. } => Severity::Error,
113            Self::TableChunkCountMismatch { .. } => Severity::Error,
114            Self::TableEntryOutOfBounds { .. } => Severity::Error,
115            Self::TableEntryOutsideSectorsRange { .. } => Severity::Error,
116            Self::SectionGapZero { .. } => Severity::Info,
117            Self::HashMismatch { .. } => Severity::Error,
118            Self::HashSectionMissing => Severity::Warning,
119        }
120    }
121}
122
123pub struct EwfIntegrity<'a> {
124    data: &'a [u8],
125}
126
127impl<'a> EwfIntegrity<'a> {
128    pub fn new(data: &'a [u8]) -> Self {
129        Self { data }
130    }
131
132    pub fn analyse(&self) -> Vec<EwfIntegrityAnomaly> {
133        let mut issues = Vec::new();
134        let data = self.data;
135        let file_size = data.len() as u64;
136
137        // ── Layer 1: File Header ──────────────────────────────────────────────
138        if data.len() < FILE_HEADER_SIZE {
139            issues.push(EwfIntegrityAnomaly::SectionChainBroken {
140                at_offset: 0,
141                next_offset: 0,
142            });
143            return issues;
144        }
145        if data[0..8] != EVF_SIGNATURE {
146            issues.push(EwfIntegrityAnomaly::InvalidSignature);
147        }
148        let segment_number = u16::from_le_bytes(data[9..11].try_into().unwrap());
149        if segment_number == 0 {
150            issues.push(EwfIntegrityAnomaly::SegmentNumberZero);
151        }
152
153        // ── Layer 2 & 3: Walk section chain ──────────────────────────────────
154        let sections = self.walk_sections(&mut issues);
155
156        // ── Layer 4: Volume geometry ──────────────────────────────────────────
157        let volume = sections
158            .iter()
159            .find(|s| s.type_name == "volume" || s.type_name == "disk");
160        let geometry: Option<VolumeGeometry> = match volume {
161            None => {
162                issues.push(EwfIntegrityAnomaly::VolumeSectionMissing);
163                None
164            }
165            Some(vol) => self.check_volume(vol.offset, &mut issues),
166        };
167
168        // ── Layer 5: Table integrity ──────────────────────────────────────────
169        let sectors_range = sections.iter().find(|s| s.type_name == "sectors").map(|s| {
170            let data_start = s.offset + SECTION_DESCRIPTOR_SIZE as u64;
171            let data_end = s.offset + s.size;
172            (data_start, data_end)
173        });
174        if let Some(table) = sections.iter().find(|s| s.type_name == "table") {
175            self.check_table(
176                table.offset,
177                geometry.as_ref().map(|g| g.chunk_count),
178                file_size,
179                sectors_range,
180                &mut issues,
181            );
182        }
183
184        // ── Layer 6: Done section present ────────────────────────────────────
185        if !sections.iter().any(|s| s.type_name == "done") {
186            issues.push(EwfIntegrityAnomaly::DoneSectionMissing);
187        }
188
189        // ── Layer 7: Hash verification ────────────────────────────────────────
190        self.check_hash(&sections, geometry.as_ref(), &mut issues);
191
192        issues
193    }
194
195    fn check_hash(
196        &self,
197        sections: &[Section],
198        geometry: Option<&VolumeGeometry>,
199        issues: &mut Vec<EwfIntegrityAnomaly>,
200    ) {
201        let data = self.data;
202
203        let hash_sec = match sections.iter().find(|s| s.type_name == "hash") {
204            Some(s) => s,
205            None => {
206                issues.push(EwfIntegrityAnomaly::HashSectionMissing);
207                return;
208            }
209        };
210
211        let sectors_sec = match sections.iter().find(|s| s.type_name == "sectors") {
212            Some(s) => s,
213            None => return,
214        };
215
216        let table_sec = match sections.iter().find(|s| s.type_name == "table") {
217            Some(s) => s,
218            None => return,
219        };
220
221        let geom = match geometry {
222            Some(g) if g.sectors_per_chunk > 0 && g.bytes_per_sector > 0 => g,
223            _ => return,
224        };
225
226        // Parse table header for entry_count and base_offset.
227        let tbl_data_start = (table_sec.offset as usize) + SECTION_DESCRIPTOR_SIZE;
228        if data.len() < tbl_data_start + 24 {
229            return;
230        }
231        let tbl = &data[tbl_data_start..];
232        let entry_count = u32::from_le_bytes(tbl[0..4].try_into().unwrap());
233        let base_offset = u64::from_le_bytes(tbl[8..16].try_into().unwrap());
234        let entries_start = tbl_data_start + 24;
235
236        // Sectors body end boundary used for the last chunk's compressed data.
237        let sectors_body_end = (sectors_sec.offset + sectors_sec.size) as usize;
238
239        let chunk_size = u64::from(geom.sectors_per_chunk) * u64::from(geom.bytes_per_sector);
240        let total_media_bytes = geom.sector_count * u64::from(geom.bytes_per_sector);
241        let mut bytes_remaining = total_media_bytes;
242
243        let mut hasher = Md5::new();
244
245        for i in 0..entry_count {
246            if bytes_remaining == 0 {
247                break;
248            }
249
250            let entry_off = entries_start + (i as usize) * 4;
251            if entry_off + 4 > data.len() {
252                return;
253            }
254            let raw = u32::from_le_bytes(data[entry_off..entry_off + 4].try_into().unwrap());
255            let is_compressed = raw & 0x8000_0000 != 0;
256            let chunk_rel = u64::from(raw & 0x7FFF_FFFF);
257            let chunk_abs_start = match base_offset.checked_add(chunk_rel) {
258                Some(abs) if (abs as usize) <= data.len() => abs as usize,
259                _ => return,
260            };
261
262            // End of this chunk's on-disk data = start of next chunk (or sectors body end).
263            let chunk_abs_end = if i + 1 < entry_count {
264                let next_off = entries_start + (i as usize + 1) * 4;
265                if next_off + 4 > data.len() {
266                    return;
267                }
268                let next_raw = u32::from_le_bytes(data[next_off..next_off + 4].try_into().unwrap());
269                let next_rel = u64::from(next_raw & 0x7FFF_FFFF);
270                match base_offset.checked_add(next_rel) {
271                    Some(abs) if (abs as usize) <= data.len() => abs as usize,
272                    _ => return,
273                }
274            } else {
275                sectors_body_end.min(data.len())
276            };
277
278            if chunk_abs_start >= chunk_abs_end {
279                return;
280            }
281
282            let chunk_data = &data[chunk_abs_start..chunk_abs_end];
283            // Bytes to feed to the hasher from this chunk (last chunk may be partial).
284            let to_hash = bytes_remaining.min(chunk_size) as usize;
285
286            if is_compressed {
287                // Deflate bomb guard: never decompress more than to_hash + 1 bytes.
288                let limit = (to_hash as u64).saturating_add(1);
289                let mut decompressed = Vec::with_capacity(to_hash);
290                if ZlibDecoder::new(chunk_data)
291                    .take(limit)
292                    .read_to_end(&mut decompressed)
293                    .is_err()
294                {
295                    return;
296                }
297                hasher.update(&decompressed[..decompressed.len().min(to_hash)]);
298            } else {
299                hasher.update(&chunk_data[..chunk_data.len().min(to_hash)]);
300            }
301
302            bytes_remaining = bytes_remaining.saturating_sub(to_hash as u64);
303        }
304
305        let computed: [u8; 16] = hasher.finalize().into();
306
307        let hash_body_start = (hash_sec.offset as usize) + SECTION_DESCRIPTOR_SIZE;
308        let stored_slice = match data.get(hash_body_start..hash_body_start + 16) {
309            Some(s) => s,
310            None => return,
311        };
312        let stored: [u8; 16] = stored_slice.try_into().unwrap();
313
314        if computed != stored {
315            issues.push(EwfIntegrityAnomaly::HashMismatch { computed, stored });
316        }
317    }
318
319    fn walk_sections(&self, issues: &mut Vec<EwfIntegrityAnomaly>) -> Vec<Section> {
320        let data = self.data;
321        let file_size = data.len() as u64;
322        let mut sections: Vec<Section> = Vec::new();
323        let mut pos = FILE_HEADER_SIZE as u64;
324
325        loop {
326            let off = pos as usize;
327            if off + SECTION_DESCRIPTOR_SIZE > data.len() {
328                break;
329            }
330
331            let desc = &data[off..off + SECTION_DESCRIPTOR_SIZE];
332
333            // Section type: NUL-terminated ASCII in first 16 bytes.
334            let type_end = desc[..16].iter().position(|&b| b == 0).unwrap_or(16);
335            let type_name = String::from_utf8_lossy(&desc[..type_end]).into_owned();
336
337            // Validate Adler-32 CRC over bytes [0..72].
338            let stored_crc = u32::from_le_bytes(desc[72..76].try_into().unwrap());
339            let computed_crc = adler32(&desc[..72]);
340            if computed_crc != stored_crc {
341                issues.push(EwfIntegrityAnomaly::SectionDescriptorCrcMismatch {
342                    offset: pos,
343                    section_type: type_name.clone(),
344                    computed: computed_crc,
345                    stored: stored_crc,
346                });
347            }
348
349            // Validate unknown section type.
350            if !KNOWN_TYPES.contains(&type_name.as_str()) {
351                issues.push(EwfIntegrityAnomaly::UnknownSectionType {
352                    offset: pos,
353                    type_name: type_name.clone(),
354                });
355            }
356
357            let next = u64::from_le_bytes(desc[16..24].try_into().unwrap());
358            let section_size = u64::from_le_bytes(desc[24..32].try_into().unwrap());
359            let section_end = pos.saturating_add(section_size);
360
361            sections.push(Section {
362                type_name: type_name.clone(),
363                offset: pos,
364                size: section_size,
365            });
366
367            // "done" terminates the chain (next == self).
368            if type_name == "done" {
369                break;
370            }
371
372            // Validate next pointer — must advance forward (no cycles or zero).
373            if next == 0 || next > file_size || next <= pos {
374                issues.push(EwfIntegrityAnomaly::SectionChainBroken {
375                    at_offset: pos,
376                    next_offset: next,
377                });
378                break;
379            }
380
381            // Gap between end of this section and start of next.
382            if next > section_end {
383                let gap_offset = section_end;
384                let gap_size = next - section_end;
385                // Only flag if gap bytes are non-zero.
386                let gap_start = gap_offset as usize;
387                let gap_end = next as usize;
388                let non_zero = data
389                    .get(gap_start..gap_end)
390                    .map(|s| s.iter().any(|&b| b != 0))
391                    .unwrap_or(false);
392                if non_zero {
393                    issues.push(EwfIntegrityAnomaly::SectionGapNonZero {
394                        gap_offset,
395                        gap_size,
396                    });
397                } else {
398                    issues.push(EwfIntegrityAnomaly::SectionGapZero {
399                        gap_offset,
400                        gap_size,
401                    });
402                }
403            }
404
405            pos = next;
406        }
407
408        sections
409    }
410
411    fn check_volume(
412        &self,
413        desc_offset: u64,
414        issues: &mut Vec<EwfIntegrityAnomaly>,
415    ) -> Option<VolumeGeometry> {
416        let data_start = (desc_offset as usize) + SECTION_DESCRIPTOR_SIZE;
417        let data = self.data;
418        if data.len() < data_start + VOLUME_DATA_MIN {
419            return None;
420        }
421        let vol = &data[data_start..];
422
423        let chunk_count = u32::from_le_bytes(vol[4..8].try_into().unwrap());
424        let sectors_per_chunk = u32::from_le_bytes(vol[8..12].try_into().unwrap());
425        let bytes_per_sector = u32::from_le_bytes(vol[12..16].try_into().unwrap());
426        let sector_count = u64::from_le_bytes(vol[16..24].try_into().unwrap());
427
428        if bytes_per_sector != 512 && bytes_per_sector != 4096 {
429            issues.push(EwfIntegrityAnomaly::BytesPerSectorInvalid { bytes_per_sector });
430        }
431
432        if sectors_per_chunk == 0 || !sectors_per_chunk.is_power_of_two() {
433            issues.push(EwfIntegrityAnomaly::ChunkSizeInvalid {
434                sectors_per_chunk,
435                bytes_per_sector,
436            });
437        }
438
439        // Valid range: last chunk may be partially filled.
440        // sector_count must satisfy: (chunk_count-1)*spc < sector_count <= chunk_count*spc.
441        // Only flag the impossible cases: too many sectors, or so few that a whole chunk
442        // is entirely unused (which would imply chunk_count was inflated).
443        let max_sectors = u64::from(chunk_count) * u64::from(sectors_per_chunk);
444        let min_sectors = max_sectors.saturating_sub(u64::from(sectors_per_chunk));
445        let out_of_range =
446            sector_count > max_sectors || (chunk_count > 0 && sector_count <= min_sectors);
447        if out_of_range && sectors_per_chunk.is_power_of_two() {
448            issues.push(EwfIntegrityAnomaly::SectorCountMismatch {
449                declared: sector_count,
450                expected: max_sectors,
451            });
452        }
453
454        Some(VolumeGeometry {
455            chunk_count,
456            sectors_per_chunk,
457            bytes_per_sector,
458            sector_count,
459        })
460    }
461
462    fn check_table(
463        &self,
464        desc_offset: u64,
465        volume_chunk_count: Option<u32>,
466        file_size: u64,
467        sectors_range: Option<(u64, u64)>,
468        issues: &mut Vec<EwfIntegrityAnomaly>,
469    ) {
470        let data_start = (desc_offset as usize) + SECTION_DESCRIPTOR_SIZE;
471        let data = self.data;
472        if data.len() < data_start + 24 {
473            return;
474        }
475        let tbl = &data[data_start..];
476
477        let entry_count = u32::from_le_bytes(tbl[0..4].try_into().unwrap());
478        let base_offset = u64::from_le_bytes(tbl[8..16].try_into().unwrap());
479
480        if let Some(vol_count) = volume_chunk_count {
481            if entry_count != vol_count {
482                issues.push(EwfIntegrityAnomaly::TableChunkCountMismatch {
483                    in_volume: vol_count,
484                    in_table: entry_count,
485                });
486            }
487        }
488
489        // Check each entry offset.
490        let entries_start = data_start + 24;
491        for i in 0..entry_count {
492            let entry_off = entries_start + (i as usize) * 4;
493            if entry_off + 4 > data.len() {
494                break;
495            }
496            let raw = u32::from_le_bytes(data[entry_off..entry_off + 4].try_into().unwrap());
497            let chunk_rel_offset = u64::from(raw & 0x7FFF_FFFF);
498            let absolute_offset = base_offset.saturating_add(chunk_rel_offset);
499            if absolute_offset >= file_size {
500                issues.push(EwfIntegrityAnomaly::TableEntryOutOfBounds {
501                    chunk_index: i,
502                    entry_offset: absolute_offset,
503                    file_size,
504                });
505            } else if let Some((sec_start, sec_end)) = sectors_range {
506                if absolute_offset < sec_start || absolute_offset >= sec_end {
507                    issues.push(EwfIntegrityAnomaly::TableEntryOutsideSectorsRange {
508                        chunk_index: i,
509                        entry_offset: absolute_offset,
510                        sectors_start: sec_start,
511                        sectors_end: sec_end,
512                    });
513                }
514            }
515        }
516    }
517}
518
519struct Section {
520    type_name: String,
521    offset: u64,
522    size: u64,
523}
524
525struct VolumeGeometry {
526    chunk_count: u32,
527    sectors_per_chunk: u32,
528    bytes_per_sector: u32,
529    sector_count: u64,
530}
531
532pub(crate) fn adler32(data: &[u8]) -> u32 {
533    const MOD: u32 = 65521;
534    let mut s1: u32 = 1;
535    let mut s2: u32 = 0;
536    for &b in data {
537        s1 = (s1 + u32::from(b)) % MOD;
538        s2 = (s2 + s1) % MOD;
539    }
540    (s2 << 16) | s1
541}